This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 33903b825bb HIVE-27714: Iceberg: metadata location overrides can cause 
data breach - handling default locations. (#4880). (Ayush Saxena, reviewed by 
Denys Kuzmenko)
33903b825bb is described below

commit 33903b825bb949d2a4c3839688817ac5ebe3fbdb
Author: Ayush Saxena <[email protected]>
AuthorDate: Fri Dec 1 20:38:52 2023 +0530

    HIVE-27714: Iceberg: metadata location overrides can cause data breach - 
handling default locations. (#4880). (Ayush Saxena, reviewed by Denys Kuzmenko)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  4 +
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 48 ++++++++++--
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  | 88 +++++++++++++++++++---
 .../hive/ql/metadata/HiveStorageHandler.java       |  1 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     | 25 ++++--
 .../metastore/api/hive_metastoreConstants.java     |  2 +
 6 files changed, 146 insertions(+), 22 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 027bab6eb53..ad807386f36 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2226,6 +2226,10 @@ public class HiveConf extends Configuration {
     
HIVE_ICEBERG_EXPIRE_SNAPSHOT_NUMTHREADS("hive.iceberg.expire.snapshot.numthreads",
 4,
         "The number of threads to be used for deleting files during expire 
snapshot. If set to 0 or below it uses the" +
             " defult DirectExecutorService"),
+
+    HIVE_ICEBERG_MASK_DEFAULT_LOCATION("hive.iceberg.mask.default.location", 
false,
+        "If this is set to true the URI for auth will have the default 
location masked with DEFAULT_TABLE_LOCATION"),
+
     HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
         "If this is set the header for RCFiles will simply be RCF.  If this is 
not\n" +
         "set the header will be that borrowed from sequence files, e.g. SEQ- 
followed\n" +
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 9cfb210eec3..5f4f97b9f72 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -48,6 +48,7 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.common.type.SnapshotContext;
@@ -214,6 +215,8 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   public static final String MERGE_ON_READ = "merge-on-read";
   public static final String STATS = "/stats/snap-";
 
+  public static final String TABLE_DEFAULT_LOCATION = "TABLE_DEFAULT_LOCATION";
+
   /**
    * Function template for producing a custom sort expression function:
    * Takes the source column index and the bucket count to creat a function 
where Iceberg bucket UDF is used to build
@@ -520,11 +523,11 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
         writer.finish();
         return true;
       } catch (IOException e) {
-        LOG.warn("Unable to write stats to puffin file", e.getMessage());
+        LOG.warn("Unable to write stats to puffin file {}", e.getMessage());
         return false;
       }
     } catch (InvalidObjectException | IOException e) {
-      LOG.warn("Unable to invalidate or merge stats: ", e.getMessage());
+      LOG.warn("Unable to invalidate or merge stats: {}", e.getMessage());
       return false;
     }
   }
@@ -1053,19 +1056,19 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     Optional<String> metadataLocation =
         SessionStateUtil.getProperty(conf, 
BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
     if (metadataLocation.isPresent()) {
-      authURI.append(encodeString(metadataLocation.get()));
+      authURI.append(getPathForAuth(metadataLocation.get()));
     } else {
       Optional<String> locationProperty =
           SessionStateUtil.getProperty(conf, 
hive_metastoreConstants.META_TABLE_LOCATION);
       if (locationProperty.isPresent()) {
         // this property is set during the create operation before the hive 
table was created
         // we are returning a dummy iceberg metadata file
-        
authURI.append(encodeString(URI.create(locationProperty.get()).getPath()))
+        authURI.append(getPathForAuth(locationProperty.get()))
             .append(encodeString("/metadata/dummy.metadata.json"));
       } else {
         Table table = IcebergTableUtil.getTable(conf, hmsTable);
-        authURI.append(
-            encodeString(URI.create(((BaseTable) 
table).operations().current().metadataFileLocation()).getPath()));
+        authURI.append(getPathForAuth(((BaseTable) 
table).operations().current().metadataFileLocation(),
+            hmsTable.getSd().getLocation()));
       }
     }
     LOG.debug("Iceberg storage handler authorization URI {}", authURI);
@@ -1080,6 +1083,39 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     return 
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.encode(rawString);
   }
 
+  String getPathForAuth(String locationProperty) {
+    return getPathForAuth(locationProperty,
+        SessionStateUtil.getProperty(conf, 
hive_metastoreConstants.DEFAULT_TABLE_LOCATION).orElse(null));
+  }
+
+  String getPathForAuth(String locationProperty, String defaultTableLocation) {
+    boolean maskDefaultLocation = 
conf.getBoolean(HiveConf.ConfVars.HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname,
+        HiveConf.ConfVars.HIVE_ICEBERG_MASK_DEFAULT_LOCATION.defaultBoolVal);
+    String location = URI.create(locationProperty).getPath();
+    if (!maskDefaultLocation || defaultTableLocation == null ||
+        !arePathsInSameFs(locationProperty, defaultTableLocation)) {
+      return encodeString(location);
+    }
+    try {
+      Path locationPath = new Path(location);
+      Path defaultLocationPath = locationPath.toUri().getScheme() != null ?
+          FileUtils.makeQualified(new Path(defaultTableLocation), conf) :
+          Path.getPathWithoutSchemeAndAuthority(new 
Path(defaultTableLocation));
+      return 
encodeString(location.replaceFirst(defaultLocationPath.toString(), 
TABLE_DEFAULT_LOCATION));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private boolean arePathsInSameFs(String locationProperty, String 
defaultTableLocation) {
+    try {
+      return FileUtils.equalsFileSystem(new 
Path(locationProperty).getFileSystem(conf),
+          new Path(defaultTableLocation).getFileSystem(conf));
+    } catch (IOException e) {
+      LOG.debug("Unable to get FileSystem for path {} and {}", 
locationProperty, defaultTableLocation);
+      return false;
+    }
+  }
 
   @Override
   public void validateSinkDesc(FileSinkDesc sinkDesc) throws SemanticException 
{
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index 696acd8903b..48da53e2b68 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -95,6 +95,7 @@ import org.junit.runners.Parameterized;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
 
+import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ICEBERG_MASK_DEFAULT_LOCATION;
 import static org.apache.iceberg.TableProperties.GC_ENABLED;
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.junit.runners.Parameterized.Parameter;
@@ -1484,31 +1485,59 @@ public class TestHiveIcebergStorageHandlerNoScan {
   }
 
   @Test
-  public void testAuthzURI() throws TException, InterruptedException, 
URISyntaxException {
+  public void testAuthzURIMasked() throws TException, URISyntaxException, 
InterruptedException {
+    testAuthzURI(true);
+  }
+
+  @Test
+  public void testAuthzURIUnmasked() throws TException, URISyntaxException, 
InterruptedException {
+    testAuthzURI(false);
+  }
+
+  public void testAuthzURI(boolean masked) throws TException, 
InterruptedException, URISyntaxException {
     TableIdentifier target = TableIdentifier.of("default", "target");
     Table table = testTables.createTable(shell, target.name(), 
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
         PartitionSpec.unpartitioned(), FileFormat.PARQUET, ImmutableList.of());
     org.apache.hadoop.hive.metastore.api.Table hmsTable = 
shell.metastore().getTable(target);
 
     HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
+    shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname, 
masked);
     storageHandler.setConf(shell.getHiveConf());
     URI uriForAuth = storageHandler.getURIForAuth(hmsTable);
 
+    String metadataLocation =
+        storageHandler.getPathForAuth(((BaseTable) 
table).operations().current().metadataFileLocation(),
+            hmsTable.getSd().getLocation());
+
+    if (masked) {
+      
Assert.assertTrue(metadataLocation.startsWith(HiveIcebergStorageHandler.TABLE_DEFAULT_LOCATION));
+    }
+
     Assert.assertEquals("iceberg://" +
             
HiveIcebergStorageHandler.encodeString(target.namespace().toString()) + "/" +
             HiveIcebergStorageHandler.encodeString(target.name()) + 
"?snapshot=" +
             HiveIcebergStorageHandler.encodeString(
-            URI.create(((BaseTable) 
table).operations().current().metadataFileLocation()).getPath()),
+            URI.create(metadataLocation).getPath()),
         uriForAuth.toString());
 
     Assert.assertEquals("iceberg://" + target.namespace() + "/" + 
target.name() + "?snapshot=" +
-        URI.create(((BaseTable) 
table).operations().current().metadataFileLocation()).getPath(),
+        URI.create(metadataLocation).getPath(),
         
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(uriForAuth.toString()));
 
   }
 
   @Test
-  public void testAuthzURIWithAuthEnabledWithMetadataLocation() throws 
HiveException {
+  public void testAuthzURIWithAuthEnabledWithMetadataLocationMasked() throws 
HiveException {
+    testAuthzURIWithAuthEnabledWithMetadataLocation(true);
+  }
+
+  @Test
+  public void testAuthzURIWithAuthEnabledWithMetadataLocationUnmasked() throws 
HiveException {
+    testAuthzURIWithAuthEnabledWithMetadataLocation(false);
+  }
+
+  public void testAuthzURIWithAuthEnabledWithMetadataLocation(boolean masked) 
throws HiveException {
+    shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname, 
masked);
     shell.setHiveSessionValue("hive.security.authorization.enabled", true);
     shell.setHiveSessionValue("hive.security.authorization.manager",
         "org.apache.iceberg.mr.hive.CustomTestHiveAuthorizerFactory");
@@ -1540,7 +1569,21 @@ public class TestHiveIcebergStorageHandlerNoScan {
   }
 
   @Test
-  public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizer() throws 
HiveException {
+  public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizerMasked()
+      throws HiveException, TException, InterruptedException {
+    
Assume.assumeTrue(testTableType.equals(TestTables.TestTableType.HIVE_CATALOG));
+    testAuthzURIWithAuthEnabledAndMockCommandAuthorizer(true);
+  }
+
+  @Test
+  public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizerUnmasked()
+      throws HiveException, TException, InterruptedException {
+    testAuthzURIWithAuthEnabledAndMockCommandAuthorizer(false);
+  }
+
+  public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizer(boolean 
masked)
+      throws HiveException, TException, InterruptedException {
+    shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname, 
masked);
     shell.setHiveSessionValue("hive.security.authorization.enabled", true);
     shell.setHiveSessionValue("hive.security.authorization.manager",
         "org.apache.iceberg.mr.hive.CustomTestHiveAuthorizerFactory");
@@ -1554,8 +1597,18 @@ public class TestHiveIcebergStorageHandlerNoScan {
     Optional<HivePrivilegeObject> hivePrivObject = 
outputHObjsCaptor.getValue().stream()
         .filter(hpo -> 
hpo.getType().equals(HivePrivilegeObject.HivePrivilegeObjectType.STORAGEHANDLER_URI)).findAny();
     if (hivePrivObject.isPresent()) {
+      org.apache.hadoop.hive.metastore.api.Table hmsTable = 
shell.metastore().getTable(target);
+      HiveIcebergStorageHandler storageHandler = new 
HiveIcebergStorageHandler();
+      storageHandler.setConf(shell.getHiveConf());
+      String metadataLocation = 
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(
+          storageHandler.getPathForAuth(((BaseTable) 
table).operations().current().metadataFileLocation(),
+              hmsTable.getSd().getLocation()));
+
+      if (masked) {
+        
Assert.assertTrue(metadataLocation.startsWith(HiveIcebergStorageHandler.TABLE_DEFAULT_LOCATION));
+      }
       Assert.assertEquals("iceberg://" + target.namespace() + "/" + 
target.name() + "?snapshot=" +
-              new Path(((BaseTable) 
table).operations().current().metadataFileLocation()).getParent().toUri()
+              new Path(metadataLocation).getParent().toUri()
                   .getPath() +
               "/dummy.metadata.json",
           
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(hivePrivObject.get().getObjectName()));
@@ -1565,7 +1618,18 @@ public class TestHiveIcebergStorageHandlerNoScan {
   }
 
   @Test
-  public void testAuthzURIWithAuthEnabled() throws TException, 
InterruptedException, URISyntaxException {
+  public void testAuthzURIWithAuthEnabledMasked() throws TException, 
URISyntaxException, InterruptedException {
+    
Assume.assumeTrue(testTableType.equals(TestTables.TestTableType.HIVE_CATALOG));
+    testAuthzURIWithAuthEnabled(true);
+  }
+
+  @Test
+  public void testAuthzURIWithAuthEnabledUnmasked() throws TException, 
URISyntaxException, InterruptedException {
+    testAuthzURIWithAuthEnabled(false);
+  }
+
+  public void testAuthzURIWithAuthEnabled(boolean masked) throws TException, 
InterruptedException, URISyntaxException {
+    shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname, 
masked);
     shell.setHiveSessionValue("hive.security.authorization.enabled", true);
     TableIdentifier target = TableIdentifier.of("default", "target");
     Table table = testTables.createTable(shell, target.name(), 
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
@@ -1575,9 +1639,15 @@ public class TestHiveIcebergStorageHandlerNoScan {
     HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
     storageHandler.setConf(shell.getHiveConf());
     URI uriForAuth = storageHandler.getURIForAuth(hmsTable);
+    String metadataLocation =
+        storageHandler.getPathForAuth(((BaseTable) 
table).operations().current().metadataFileLocation(),
+            hmsTable.getSd().getLocation());
+
+    if (masked) {
+      
Assert.assertTrue(metadataLocation.startsWith(HiveIcebergStorageHandler.TABLE_DEFAULT_LOCATION));
+    }
     Assert.assertEquals("iceberg://" + target.namespace() + "/" + 
target.name() + "?snapshot=" +
-        URI.create(((BaseTable) table).operations().current()
-        .metadataFileLocation()).getPath(),
+        URI.create(metadataLocation).getPath(),
         
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(uriForAuth.toString()));
   }
 
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 7d646e8b7ab..9fe61c0bdc3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -24,6 +24,7 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Collections;
 import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience;
 import org.apache.hadoop.hive.common.classification.InterfaceStability;
 import org.apache.hadoop.hive.common.type.SnapshotContext;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 9fa27638eb0..8cdba7fd8df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -25,6 +25,7 @@ import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONV
 import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEARCHIVEENABLED;
 import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER;
 import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
+import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DEFAULT_TABLE_LOCATION;
 import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION;
 import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
 import static 
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
@@ -13800,6 +13801,11 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       addDbAndTabToOutputs(qualifiedTabName,
               TableType.EXTERNAL_TABLE, isTemporaryTable, retValue, 
storageFormat);
     }
+
+    if (isIcebergTable(retValue)) {
+      SessionStateUtil.addResourceOrThrow(conf, 
hive_metastoreConstants.DEFAULT_TABLE_LOCATION,
+          getDefaultLocation(qualifiedTabName[0], qualifiedTabName[1], true));
+    }
     return retValue;
   }
 
@@ -14211,13 +14217,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       if (location != null) {
         tblLocation = location;
       } else {
-        try {
-          Warehouse wh = new Warehouse(conf);
-          tblLocation = 
wh.getDefaultTablePath(db.getDatabase(qualifiedTabName.getDb()), 
qualifiedTabName.getTable(),
-                  isExt).toUri().getPath();
-        } catch (MetaException | HiveException e) {
-          throw new SemanticException(e);
-        }
+        tblLocation = getDefaultLocation(qualifiedTabName.getDb(), 
qualifiedTabName.getTable(), isExt);
       }
       try {
         HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(conf, 
storageFormat.getStorageHandler());
@@ -14376,6 +14376,17 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     return null;
   }
 
+  private String getDefaultLocation(String dbName, String tableName, boolean 
isExt) throws SemanticException {
+    String tblLocation;
+    try {
+      Warehouse wh = new Warehouse(conf);
+      tblLocation = wh.getDefaultTablePath(db.getDatabase(dbName), tableName, 
isExt).toUri().getPath();
+    } catch (MetaException | HiveException e) {
+      throw new SemanticException(e);
+    }
+    return tblLocation;
+  }
+
   private static boolean isIcebergTable(Map<String, String> tblProps) {
     return 
AlterTableConvertOperation.ConversionFormats.ICEBERG.properties().get(META_TABLE_STORAGE)
         .equalsIgnoreCase(tblProps.get(META_TABLE_STORAGE));
diff --git 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
index f5a102ab964..776683b882f 100644
--- 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
+++ 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
@@ -97,4 +97,6 @@ package org.apache.hadoop.hive.metastore.api;
 
   public static final java.lang.String EXPECTED_PARAMETER_VALUE = 
"expected_parameter_value";
 
+  public static final java.lang.String DEFAULT_TABLE_LOCATION = 
"defaultLocation";
+
 }

Reply via email to