This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 33903b825bb HIVE-27714: Iceberg: metadata location overrides can cause
data breach - handling default locations. (#4880). (Ayush Saxena, reviewed by
Denys Kuzmenko)
33903b825bb is described below
commit 33903b825bb949d2a4c3839688817ac5ebe3fbdb
Author: Ayush Saxena <[email protected]>
AuthorDate: Fri Dec 1 20:38:52 2023 +0530
HIVE-27714: Iceberg: metadata location overrides can cause data breach -
handling default locations. (#4880). (Ayush Saxena, reviewed by Denys Kuzmenko)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 48 ++++++++++--
.../hive/TestHiveIcebergStorageHandlerNoScan.java | 88 +++++++++++++++++++---
.../hive/ql/metadata/HiveStorageHandler.java | 1 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 25 ++++--
.../metastore/api/hive_metastoreConstants.java | 2 +
6 files changed, 146 insertions(+), 22 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 027bab6eb53..ad807386f36 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2226,6 +2226,10 @@ public class HiveConf extends Configuration {
HIVE_ICEBERG_EXPIRE_SNAPSHOT_NUMTHREADS("hive.iceberg.expire.snapshot.numthreads",
4,
"The number of threads to be used for deleting files during expire
snapshot. If set to 0 or below it uses the" +
" defult DirectExecutorService"),
+
+ HIVE_ICEBERG_MASK_DEFAULT_LOCATION("hive.iceberg.mask.default.location",
false,
+ "If this is set to true the URI for auth will have the default
location masked with DEFAULT_TABLE_LOCATION"),
+
HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
"If this is set the header for RCFiles will simply be RCF. If this is
not\n" +
"set the header will be that borrowed from sequence files, e.g. SEQ-
followed\n" +
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 9cfb210eec3..5f4f97b9f72 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -48,6 +48,7 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.type.Date;
import org.apache.hadoop.hive.common.type.SnapshotContext;
@@ -214,6 +215,8 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
public static final String MERGE_ON_READ = "merge-on-read";
public static final String STATS = "/stats/snap-";
+ public static final String TABLE_DEFAULT_LOCATION = "TABLE_DEFAULT_LOCATION";
+
/**
* Function template for producing a custom sort expression function:
* Takes the source column index and the bucket count to creat a function
where Iceberg bucket UDF is used to build
@@ -520,11 +523,11 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
writer.finish();
return true;
} catch (IOException e) {
- LOG.warn("Unable to write stats to puffin file", e.getMessage());
+ LOG.warn("Unable to write stats to puffin file {}", e.getMessage());
return false;
}
} catch (InvalidObjectException | IOException e) {
- LOG.warn("Unable to invalidate or merge stats: ", e.getMessage());
+ LOG.warn("Unable to invalidate or merge stats: {}", e.getMessage());
return false;
}
}
@@ -1053,19 +1056,19 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
Optional<String> metadataLocation =
SessionStateUtil.getProperty(conf,
BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
if (metadataLocation.isPresent()) {
- authURI.append(encodeString(metadataLocation.get()));
+ authURI.append(getPathForAuth(metadataLocation.get()));
} else {
Optional<String> locationProperty =
SessionStateUtil.getProperty(conf,
hive_metastoreConstants.META_TABLE_LOCATION);
if (locationProperty.isPresent()) {
// this property is set during the create operation before the hive
table was created
// we are returning a dummy iceberg metadata file
-
authURI.append(encodeString(URI.create(locationProperty.get()).getPath()))
+ authURI.append(getPathForAuth(locationProperty.get()))
.append(encodeString("/metadata/dummy.metadata.json"));
} else {
Table table = IcebergTableUtil.getTable(conf, hmsTable);
- authURI.append(
- encodeString(URI.create(((BaseTable)
table).operations().current().metadataFileLocation()).getPath()));
+ authURI.append(getPathForAuth(((BaseTable)
table).operations().current().metadataFileLocation(),
+ hmsTable.getSd().getLocation()));
}
}
LOG.debug("Iceberg storage handler authorization URI {}", authURI);
@@ -1080,6 +1083,39 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
return
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.encode(rawString);
}
+ String getPathForAuth(String locationProperty) {
+ return getPathForAuth(locationProperty,
+ SessionStateUtil.getProperty(conf,
hive_metastoreConstants.DEFAULT_TABLE_LOCATION).orElse(null));
+ }
+
+ String getPathForAuth(String locationProperty, String defaultTableLocation) {
+ boolean maskDefaultLocation =
conf.getBoolean(HiveConf.ConfVars.HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname,
+ HiveConf.ConfVars.HIVE_ICEBERG_MASK_DEFAULT_LOCATION.defaultBoolVal);
+ String location = URI.create(locationProperty).getPath();
+ if (!maskDefaultLocation || defaultTableLocation == null ||
+ !arePathsInSameFs(locationProperty, defaultTableLocation)) {
+ return encodeString(location);
+ }
+ try {
+ Path locationPath = new Path(location);
+ Path defaultLocationPath = locationPath.toUri().getScheme() != null ?
+ FileUtils.makeQualified(new Path(defaultTableLocation), conf) :
+ Path.getPathWithoutSchemeAndAuthority(new
Path(defaultTableLocation));
+ return
encodeString(location.replaceFirst(defaultLocationPath.toString(),
TABLE_DEFAULT_LOCATION));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private boolean arePathsInSameFs(String locationProperty, String
defaultTableLocation) {
+ try {
+ return FileUtils.equalsFileSystem(new
Path(locationProperty).getFileSystem(conf),
+ new Path(defaultTableLocation).getFileSystem(conf));
+ } catch (IOException e) {
+ LOG.debug("Unable to get FileSystem for path {} and {}",
locationProperty, defaultTableLocation);
+ return false;
+ }
+ }
@Override
public void validateSinkDesc(FileSinkDesc sinkDesc) throws SemanticException
{
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index 696acd8903b..48da53e2b68 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -95,6 +95,7 @@ import org.junit.runners.Parameterized;
import org.mockito.ArgumentCaptor;
import org.mockito.Mockito;
+import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ICEBERG_MASK_DEFAULT_LOCATION;
import static org.apache.iceberg.TableProperties.GC_ENABLED;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.junit.runners.Parameterized.Parameter;
@@ -1484,31 +1485,59 @@ public class TestHiveIcebergStorageHandlerNoScan {
}
@Test
- public void testAuthzURI() throws TException, InterruptedException,
URISyntaxException {
+ public void testAuthzURIMasked() throws TException, URISyntaxException,
InterruptedException {
+ testAuthzURI(true);
+ }
+
+ @Test
+ public void testAuthzURIUnmasked() throws TException, URISyntaxException,
InterruptedException {
+ testAuthzURI(false);
+ }
+
+ public void testAuthzURI(boolean masked) throws TException,
InterruptedException, URISyntaxException {
TableIdentifier target = TableIdentifier.of("default", "target");
Table table = testTables.createTable(shell, target.name(),
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
PartitionSpec.unpartitioned(), FileFormat.PARQUET, ImmutableList.of());
org.apache.hadoop.hive.metastore.api.Table hmsTable =
shell.metastore().getTable(target);
HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
+ shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname,
masked);
storageHandler.setConf(shell.getHiveConf());
URI uriForAuth = storageHandler.getURIForAuth(hmsTable);
+ String metadataLocation =
+ storageHandler.getPathForAuth(((BaseTable)
table).operations().current().metadataFileLocation(),
+ hmsTable.getSd().getLocation());
+
+ if (masked) {
+
Assert.assertTrue(metadataLocation.startsWith(HiveIcebergStorageHandler.TABLE_DEFAULT_LOCATION));
+ }
+
Assert.assertEquals("iceberg://" +
HiveIcebergStorageHandler.encodeString(target.namespace().toString()) + "/" +
HiveIcebergStorageHandler.encodeString(target.name()) +
"?snapshot=" +
HiveIcebergStorageHandler.encodeString(
- URI.create(((BaseTable)
table).operations().current().metadataFileLocation()).getPath()),
+ URI.create(metadataLocation).getPath()),
uriForAuth.toString());
Assert.assertEquals("iceberg://" + target.namespace() + "/" +
target.name() + "?snapshot=" +
- URI.create(((BaseTable)
table).operations().current().metadataFileLocation()).getPath(),
+ URI.create(metadataLocation).getPath(),
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(uriForAuth.toString()));
}
@Test
- public void testAuthzURIWithAuthEnabledWithMetadataLocation() throws
HiveException {
+ public void testAuthzURIWithAuthEnabledWithMetadataLocationMasked() throws
HiveException {
+ testAuthzURIWithAuthEnabledWithMetadataLocation(true);
+ }
+
+ @Test
+ public void testAuthzURIWithAuthEnabledWithMetadataLocationUnmasked() throws
HiveException {
+ testAuthzURIWithAuthEnabledWithMetadataLocation(false);
+ }
+
+ public void testAuthzURIWithAuthEnabledWithMetadataLocation(boolean masked)
throws HiveException {
+ shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname,
masked);
shell.setHiveSessionValue("hive.security.authorization.enabled", true);
shell.setHiveSessionValue("hive.security.authorization.manager",
"org.apache.iceberg.mr.hive.CustomTestHiveAuthorizerFactory");
@@ -1540,7 +1569,21 @@ public class TestHiveIcebergStorageHandlerNoScan {
}
@Test
- public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizer() throws
HiveException {
+ public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizerMasked()
+ throws HiveException, TException, InterruptedException {
+
Assume.assumeTrue(testTableType.equals(TestTables.TestTableType.HIVE_CATALOG));
+ testAuthzURIWithAuthEnabledAndMockCommandAuthorizer(true);
+ }
+
+ @Test
+ public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizerUnmasked()
+ throws HiveException, TException, InterruptedException {
+ testAuthzURIWithAuthEnabledAndMockCommandAuthorizer(false);
+ }
+
+ public void testAuthzURIWithAuthEnabledAndMockCommandAuthorizer(boolean
masked)
+ throws HiveException, TException, InterruptedException {
+ shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname,
masked);
shell.setHiveSessionValue("hive.security.authorization.enabled", true);
shell.setHiveSessionValue("hive.security.authorization.manager",
"org.apache.iceberg.mr.hive.CustomTestHiveAuthorizerFactory");
@@ -1554,8 +1597,18 @@ public class TestHiveIcebergStorageHandlerNoScan {
Optional<HivePrivilegeObject> hivePrivObject =
outputHObjsCaptor.getValue().stream()
.filter(hpo ->
hpo.getType().equals(HivePrivilegeObject.HivePrivilegeObjectType.STORAGEHANDLER_URI)).findAny();
if (hivePrivObject.isPresent()) {
+ org.apache.hadoop.hive.metastore.api.Table hmsTable =
shell.metastore().getTable(target);
+ HiveIcebergStorageHandler storageHandler = new
HiveIcebergStorageHandler();
+ storageHandler.setConf(shell.getHiveConf());
+ String metadataLocation =
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(
+ storageHandler.getPathForAuth(((BaseTable)
table).operations().current().metadataFileLocation(),
+ hmsTable.getSd().getLocation()));
+
+ if (masked) {
+
Assert.assertTrue(metadataLocation.startsWith(HiveIcebergStorageHandler.TABLE_DEFAULT_LOCATION));
+ }
Assert.assertEquals("iceberg://" + target.namespace() + "/" +
target.name() + "?snapshot=" +
- new Path(((BaseTable)
table).operations().current().metadataFileLocation()).getParent().toUri()
+ new Path(metadataLocation).getParent().toUri()
.getPath() +
"/dummy.metadata.json",
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(hivePrivObject.get().getObjectName()));
@@ -1565,7 +1618,18 @@ public class TestHiveIcebergStorageHandlerNoScan {
}
@Test
- public void testAuthzURIWithAuthEnabled() throws TException,
InterruptedException, URISyntaxException {
+ public void testAuthzURIWithAuthEnabledMasked() throws TException,
URISyntaxException, InterruptedException {
+
Assume.assumeTrue(testTableType.equals(TestTables.TestTableType.HIVE_CATALOG));
+ testAuthzURIWithAuthEnabled(true);
+ }
+
+ @Test
+ public void testAuthzURIWithAuthEnabledUnmasked() throws TException,
URISyntaxException, InterruptedException {
+ testAuthzURIWithAuthEnabled(false);
+ }
+
+ public void testAuthzURIWithAuthEnabled(boolean masked) throws TException,
InterruptedException, URISyntaxException {
+ shell.getHiveConf().setBoolean(HIVE_ICEBERG_MASK_DEFAULT_LOCATION.varname,
masked);
shell.setHiveSessionValue("hive.security.authorization.enabled", true);
TableIdentifier target = TableIdentifier.of("default", "target");
Table table = testTables.createTable(shell, target.name(),
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
@@ -1575,9 +1639,15 @@ public class TestHiveIcebergStorageHandlerNoScan {
HiveIcebergStorageHandler storageHandler = new HiveIcebergStorageHandler();
storageHandler.setConf(shell.getHiveConf());
URI uriForAuth = storageHandler.getURIForAuth(hmsTable);
+ String metadataLocation =
+ storageHandler.getPathForAuth(((BaseTable)
table).operations().current().metadataFileLocation(),
+ hmsTable.getSd().getLocation());
+
+ if (masked) {
+
Assert.assertTrue(metadataLocation.startsWith(HiveIcebergStorageHandler.TABLE_DEFAULT_LOCATION));
+ }
Assert.assertEquals("iceberg://" + target.namespace() + "/" +
target.name() + "?snapshot=" +
- URI.create(((BaseTable) table).operations().current()
- .metadataFileLocation()).getPath(),
+ URI.create(metadataLocation).getPath(),
HiveConf.EncoderDecoderFactory.URL_ENCODER_DECODER.decode(uriForAuth.toString()));
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 7d646e8b7ab..9fe61c0bdc3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -24,6 +24,7 @@ import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collections;
import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
import org.apache.hadoop.hive.common.type.SnapshotContext;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 9fa27638eb0..8cdba7fd8df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -25,6 +25,7 @@ import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONV
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEARCHIVEENABLED;
import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
+import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.DEFAULT_TABLE_LOCATION;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
import static
org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS;
@@ -13800,6 +13801,11 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
addDbAndTabToOutputs(qualifiedTabName,
TableType.EXTERNAL_TABLE, isTemporaryTable, retValue,
storageFormat);
}
+
+ if (isIcebergTable(retValue)) {
+ SessionStateUtil.addResourceOrThrow(conf,
hive_metastoreConstants.DEFAULT_TABLE_LOCATION,
+ getDefaultLocation(qualifiedTabName[0], qualifiedTabName[1], true));
+ }
return retValue;
}
@@ -14211,13 +14217,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
if (location != null) {
tblLocation = location;
} else {
- try {
- Warehouse wh = new Warehouse(conf);
- tblLocation =
wh.getDefaultTablePath(db.getDatabase(qualifiedTabName.getDb()),
qualifiedTabName.getTable(),
- isExt).toUri().getPath();
- } catch (MetaException | HiveException e) {
- throw new SemanticException(e);
- }
+ tblLocation = getDefaultLocation(qualifiedTabName.getDb(),
qualifiedTabName.getTable(), isExt);
}
try {
HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(conf,
storageFormat.getStorageHandler());
@@ -14376,6 +14376,17 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
return null;
}
+ private String getDefaultLocation(String dbName, String tableName, boolean
isExt) throws SemanticException {
+ String tblLocation;
+ try {
+ Warehouse wh = new Warehouse(conf);
+ tblLocation = wh.getDefaultTablePath(db.getDatabase(dbName), tableName,
isExt).toUri().getPath();
+ } catch (MetaException | HiveException e) {
+ throw new SemanticException(e);
+ }
+ return tblLocation;
+ }
+
private static boolean isIcebergTable(Map<String, String> tblProps) {
return
AlterTableConvertOperation.ConversionFormats.ICEBERG.properties().get(META_TABLE_STORAGE)
.equalsIgnoreCase(tblProps.get(META_TABLE_STORAGE));
diff --git
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
index f5a102ab964..776683b882f 100644
---
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
+++
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
@@ -97,4 +97,6 @@ package org.apache.hadoop.hive.metastore.api;
public static final java.lang.String EXPECTED_PARAMETER_VALUE =
"expected_parameter_value";
+ public static final java.lang.String DEFAULT_TABLE_LOCATION =
"defaultLocation";
+
}