This is an automated email from the ASF dual-hosted git repository.
saihemanth-cloudera pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ca64f08a8e4 HIVE-29622: Improve metastore direct-SQL partition lookup
robustness (#6509)
ca64f08a8e4 is described below
commit ca64f08a8e43db9845b47d5fa2e96f7fdea7288e
Author: Sai Hemanth Gantasala
<[email protected]>
AuthorDate: Wed Jun 3 12:43:52 2026 -0700
HIVE-29622: Improve metastore direct-SQL partition lookup robustness (#6509)
---
.../metastore/directsql/DirectSqlUpdatePart.java | 55 ++++++++--------
.../metastore/directsql/MetaStoreDirectSql.java | 17 +++--
.../hadoop/hive/metastore/TestObjectStore.java | 75 ++++++++++++++++++++++
3 files changed, 116 insertions(+), 31 deletions(-)
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java
index 4a7f831d8d0..94926c01564 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/DirectSqlUpdatePart.java
@@ -77,6 +77,7 @@
import static
org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.extractSqlInt;
import static
org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.extractSqlLong;
import static
org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.getModelIdentity;
+import static
org.apache.hadoop.hive.metastore.directsql.MetastoreDirectSqlUtils.makeParams;
import static
org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.getPartValsFromName;
/**
@@ -98,10 +99,6 @@ class DirectSqlUpdatePart extends DirectSqlBase {
sqlGenerator = new SQLGenerator(dbType, conf);
}
- static String quoteString(String input) {
- return "'" + input + "'";
- }
-
private void populateInsertUpdateMap(Map<PartitionInfo, ColumnStatistics>
statsPartInfoMap,
Map<PartColNameInfo,
MPartitionColumnStatistics> updateMap,
Map<PartColNameInfo,
MPartitionColumnStatistics>insertMap,
@@ -412,35 +409,39 @@ private Map<String, Map<String, String>>
updatePartitionParamTable(Connection db
private Map<PartitionInfo, ColumnStatistics> getPartitionInfo(Connection
dbConn, long tblId,
Map<String,
ColumnStatistics> partColStatsMap)
- throws SQLException, MetaException {
- List<String> queries = new ArrayList<>();
- StringBuilder prefix = new StringBuilder();
- StringBuilder suffix = new StringBuilder();
+ throws MetaException {
Map<PartitionInfo, ColumnStatistics> partitionInfoMap = new HashMap<>();
+ List<String> partNames = new ArrayList<>(partColStatsMap.keySet());
+ if (partNames.isEmpty()) {
+ return partitionInfoMap;
+ }
- List<String> partKeys = partColStatsMap.keySet().stream().map(
- e -> quoteString(e)).collect(Collectors.toList()
- );
-
- prefix.append("select \"PART_ID\", \"WRITE_ID\", \"PART_NAME\" from
\"PARTITIONS\" where ");
- suffix.append(" and \"TBL_ID\" = " + tblId);
- TxnUtils.buildQueryWithINClauseStrings(conf, queries, prefix, suffix,
- partKeys, "\"PART_NAME\"", true, false);
-
- try (Statement statement = dbConn.createStatement()) {
- for (String query : queries) {
+ Batchable.runBatched(maxBatchSize, partNames, new Batchable<String,
Void>() {
+ @Override
+ public List<Void> run(List<String> input) throws Exception {
+ String placeholders = makeParams(input.size());
+ String query = "select \"PART_ID\", \"WRITE_ID\", \"PART_NAME\" from
\"PARTITIONS\" where "
+ + "\"PART_NAME\" in (" + placeholders + ") and \"TBL_ID\" = ?";
// Select for update makes sure that the partitions are not modified
while the stats are getting updated.
query = sqlGenerator.addForUpdateClause(query);
LOG.debug("Execute query: " + query);
- try (ResultSet rs = statement.executeQuery(query)) {
- while (rs.next()) {
- PartitionInfo partitionInfo = new PartitionInfo(rs.getLong(1),
- rs.getLong(2), rs.getString(3));
- partitionInfoMap.put(partitionInfo,
partColStatsMap.get(rs.getString(3)));
+ try (PreparedStatement ps = dbConn.prepareStatement(query)) {
+ int paramIndex = 1;
+ for (String partName : input) {
+ ps.setString(paramIndex++, partName);
+ }
+ ps.setLong(paramIndex, tblId);
+ try (ResultSet rs = ps.executeQuery()) {
+ while (rs.next()) {
+ String partName = rs.getString(3);
+ PartitionInfo partitionInfo = new PartitionInfo(rs.getLong(1),
rs.getLong(2), partName);
+ partitionInfoMap.put(partitionInfo,
partColStatsMap.get(partName));
+ }
}
}
+ return Collections.emptyList();
}
- }
+ });
return partitionInfoMap;
}
@@ -473,6 +474,10 @@ public Map<String, Map<String, String>>
updatePartitionColumnStatistics(Map<Stri
Map<PartitionInfo, ColumnStatistics> partitionInfoMap =
getPartitionInfo(dbConn, tbl.getId(), partColStatsMap);
+ if (partitionInfoMap.isEmpty()) {
+ return Collections.emptyMap();
+ }
+
result = updatePartitionParamTable(dbConn, partitionInfoMap,
validWriteIds,
writeId, TxnUtils.isAcidTable(tbl));
diff --git
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java
index 3a317104ad8..713f3ff4e33 100644
---
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java
+++
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/directsql/MetaStoreDirectSql.java
@@ -738,7 +738,7 @@ public List<Partition> getPartitionsViaPartNames(final
String catName, final Str
return Batchable.runBatched(batchSize, partNames, new Batchable<String,
Partition>() {
@Override
public List<Partition> run(List<String> input) throws MetaException {
- return getPartitionsByNames(catName, dbName, tblName, partNames,
false, args);
+ return getPartitionsByNames(catName, dbName, tblName, input, false,
args);
}
});
}
@@ -1028,9 +1028,7 @@ private List<Partition> getPartitionsByNames(String
catName, String dbName,
throws MetaException {
// Get most of the fields for the partNames provided.
// Assume db and table names are the same for all partition, as provided
in arguments.
- String quotedPartNames = partNameList.stream()
- .map(DirectSqlUpdatePart::quoteString)
- .collect(Collectors.joining(","));
+ String partNameParams = makeParams(partNameList.size());
String queryText =
"select " + PARTITIONS + ".\"PART_ID\"," + SDS + ".\"SD_ID\"," + SDS +
".\"CD_ID\","
@@ -1043,11 +1041,18 @@ private List<Partition> getPartitionsByNames(String
catName, String dbName,
+ " left outer join " + SERDES + " on " + SDS + ".\"SERDE_ID\" = " +
SERDES + ".\"SERDE_ID\" "
+ " inner join " + TBLS + " on " + TBLS + ".\"TBL_ID\" = " +
PARTITIONS + ".\"TBL_ID\" "
+ " inner join " + DBS + " on " + DBS + ".\"DB_ID\" = " + TBLS +
".\"DB_ID\" "
- + " where \"PART_NAME\" in (" + quotedPartNames + ") "
+ + " where " + PARTITIONS + ".\"PART_NAME\" in (" + partNameParams + ")
"
+ " and " + TBLS + ".\"TBL_NAME\" = ? and " + DBS + ".\"NAME\" = ? and
" + DBS
+ ".\"CTLG_NAME\" = ? order by \"PART_NAME\" asc";
- Object[] params = new Object[]{tblName, dbName, catName};
+ Object[] params = new Object[partNameList.size() + 3];
+ int i = 0;
+ for (String partName : partNameList) {
+ params[i++] = partName;
+ }
+ params[i++] = tblName;
+ params[i++] = dbName;
+ params[i] = catName;
return getPartitionsByQuery(catName, dbName, tblName, queryText, params,
isAcidTable, args);
}
diff --git
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
index afede2f768c..bbbf5f4bb32 100644
---
a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
+++
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
@@ -149,6 +149,9 @@ public class TestObjectStore {
private static final String USER1 = "testobjectstoreuser1";
private static final String ROLE1 = "testobjectstorerole1";
private static final String ROLE2 = "testobjectstorerole2";
+ private static final String SQLI_PART_NAME = "test_part_col=missing') OR 1=1
-- ";
+ private static final List<String> ALL_PART_NAMES =
+ Arrays.asList("test_part_col=a0", "test_part_col=a1",
"test_part_col=a2");
private static final Logger LOG =
LoggerFactory.getLogger(TestObjectStore.class.getName());
private static final class LongSupplier implements Supplier<Long> {
@@ -802,6 +805,21 @@ public void testDirectSQLDropPartitionsCacheInSession()
Assert.assertEquals(1, partitions.size());
}
+ @Test
+ public void testDirectSQLDropPartitionsRejectsSqlInjectionInPartName()
+ throws Exception {
+ createPartitionedTable(false, false, new HashSet<>());
+
+ objectStore.dropPartitionsInternal(DEFAULT_CATALOG_NAME, DB1, TABLE1,
+ Collections.singletonList(SQLI_PART_NAME), true, false);
+
+ List<Partition> partitions;
+ try (AutoCloseable c = deadline()) {
+ partitions = objectStore.getPartitionsByNames(DEFAULT_CATALOG_NAME, DB1,
TABLE1, ALL_PART_NAMES);
+ }
+ Assert.assertEquals(3, partitions.size());
+ }
+
/**
* Checks if the JDO cache is able to handle directSQL partition drops cross
sessions.
*/
@@ -1024,6 +1042,63 @@ public void
testDeletePartitionColumnStatisticsWhenEngineHasSpecialCharacter() t
List.of("test_part_col=a2"), null, "special '");
}
+ @Test
+ public void testGetPartitionsByNamesRejectsSqlInjectionInPartName() throws
Exception {
+ createPartitionedTable(true, true, new HashSet<>());
+ List<Partition> partitions;
+ try (AutoCloseable c = deadline()) {
+ partitions = objectStore.getPartitionsByNames(DEFAULT_CATALOG_NAME, DB1,
TABLE1,
+ Collections.singletonList(SQLI_PART_NAME));
+ }
+ Assert.assertEquals(0, partitions.size());
+ try (AutoCloseable c = deadline()) {
+ partitions = objectStore.getPartitionsByNames(DEFAULT_CATALOG_NAME, DB1,
TABLE1, ALL_PART_NAMES);
+ }
+ Assert.assertEquals(3, partitions.size());
+ }
+
+ @Test
+ public void
testUpdatePartitionColumnStatisticsInBatchRejectsSqlInjectionInPartName()
+ throws Exception {
+ createPartitionedTable(true, true, new HashSet<>());
+ Table tbl = objectStore.getTable(DEFAULT_CATALOG_NAME, DB1, TABLE1);
+
+ List<List<ColumnStatistics>> baseline;
+ try (AutoCloseable c = deadline()) {
+ baseline =
objectStore.getPartitionColumnStatistics(DEFAULT_CATALOG_NAME, DB1, TABLE1,
+ ALL_PART_NAMES, Collections.singletonList("test_part_col"));
+ }
+ Assert.assertEquals(1, baseline.size());
+ Assert.assertEquals(3, baseline.get(0).size());
+ long baselineNumNulls =
baseline.get(0).get(0).getStatsObj().get(0).getStatsData()
+ .getLongStats().getNumNulls();
+
+ ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, DB1,
TABLE1);
+ statsDesc.setCatName(DEFAULT_CATALOG_NAME);
+ statsDesc.setPartName(SQLI_PART_NAME);
+ ColumnStatisticsData injectedData = new
ColStatsBuilder<>(long.class).numNulls(999).numDVs(2)
+ .low(3L).high(4L).build();
+ ColumnStatisticsObj statsObj = new ColumnStatisticsObj("test_part_col",
"int", injectedData);
+ ColumnStatistics maliciousStats = new ColumnStatistics(statsDesc,
+ Collections.singletonList(statsObj));
+ maliciousStats.setEngine(ENGINE);
+
+ Map<String, ColumnStatistics> statsMap = new HashMap<>();
+ statsMap.put(SQLI_PART_NAME, maliciousStats);
+ objectStore.updatePartitionColumnStatisticsInBatch(statsMap, tbl, null,
null, -1);
+
+ List<List<ColumnStatistics>> after;
+ try (AutoCloseable c = deadline()) {
+ after = objectStore.getPartitionColumnStatistics(DEFAULT_CATALOG_NAME,
DB1, TABLE1,
+ ALL_PART_NAMES, Collections.singletonList("test_part_col"));
+ }
+ Assert.assertEquals(3, after.get(0).size());
+ for (ColumnStatistics cs : after.get(0)) {
+ Assert.assertEquals(baselineNumNulls,
+ cs.getStatsObj().get(0).getStatsData().getLongStats().getNumNulls());
+ }
+ }
+
private void setAggrConf(boolean enableBitVector, boolean enableKll, int
batchSize) {
Configuration conf2 = MetastoreConf.newMetastoreConf(conf);
MetastoreConf.setBoolVar(conf2, ConfVars.STATS_FETCH_BITVECTOR,
enableBitVector);