This is an automated email from the ASF dual-hosted git repository.

zhangbutao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new c26c25df596 HIVE-28062: Optimize get_partitions_by_names in direct sql 
(#5063)(Wechar Yu, reviewed by Denys Kuzmenko, Butao Zhang)
c26c25df596 is described below

commit c26c25df5963108cd3c4921675e4b67a7f0401fd
Author: Wechar Yu <[email protected]>
AuthorDate: Wed Mar 27 14:11:50 2024 +0800

    HIVE-28062: Optimize get_partitions_by_names in direct sql (#5063)(Wechar 
Yu, reviewed by Denys Kuzmenko, Butao Zhang)
---
 .../ql/TestMetaStoreLimitPartitionRequest.java     |  3 +
 .../hadoop/hive/metastore/conf/MetastoreConf.java  |  1 +
 .../apache/hadoop/hive/metastore/HMSHandler.java   |  6 +-
 .../hadoop/hive/metastore/MetaStoreDirectSql.java  | 65 +++++++++++++++-------
 4 files changed, 52 insertions(+), 23 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMetaStoreLimitPartitionRequest.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMetaStoreLimitPartitionRequest.java
index 470b5ccdfd0..8ea7ff9e1b2 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMetaStoreLimitPartitionRequest.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMetaStoreLimitPartitionRequest.java
@@ -73,6 +73,9 @@ public class TestMetaStoreLimitPartitionRequest {
     conf.setBoolVar(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL, true);
     conf.setBoolVar(HiveConf.ConfVars.DYNAMIC_PARTITIONING, true);
     conf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, false);
+    // Disable loading dynamic partitions from partition names in this test
+    // because get_partitions_by_names will also hit partition limit since 
HIVE-28062.
+    
conf.setBoolVar(HiveConf.ConfVars.HIVE_LOAD_DYNAMIC_PARTITIONS_SCAN_SPECIFIC_PARTITIONS,
 false);
 
     miniHS2 = new MiniHS2.Builder().withConf(conf).build();
     Map<String, String> overlayProps = new HashMap<String, String>();
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 5a0c8a4ba0f..b2713144bdb 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -1049,6 +1049,7 @@ public class MetastoreConf {
         "This limits the number of partitions (whole partition objects) that 
can be requested " +
         "from the metastore for a give table. MetaStore API methods using this 
are: \n" +
                 "get_partitions, \n" +
+                "get_partitions_by_names, \n" +
                 "get_partitions_with_auth, \n" +
                 "get_partitions_by_filter, \n" +
                 "get_partitions_spec_by_filter, \n" +
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
index cd50a3480ee..f143ed6ee45 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
@@ -7466,7 +7466,7 @@ public class HMSHandler extends FacebookBase implements 
IHMSHandler {
     return result;
   }
 
-  public List<Partition> get_partitions_by_names(final String dbName, final 
String tblName,
+  private List<Partition> get_partitions_by_names(final String dbName, final 
String tblName,
       boolean getColStats, String engine,
       List<String> processorCapabilities, String processorId,
       GetPartitionsArgs args) throws TException {
@@ -7478,14 +7478,14 @@ public class HMSHandler extends FacebookBase implements 
IHMSHandler {
     Table table = null;
     Exception ex = null;
     boolean success = false;
-    startTableFunction("get_partitions_by_names", parsedCatName, parsedDbName,
-        tblName);
+    startTableFunction("get_partitions_by_names", parsedCatName, parsedDbName, 
tblName);
     try {
       getMS().openTransaction();
       authorizeTableForPartitionMetadata(parsedCatName, parsedDbName, tblName);
 
       fireReadTablePreEvent(parsedCatName, parsedDbName, tblName);
 
+      checkLimitNumberOfPartitions(tblName, args.getPartNames().size(), -1);
       ret = getMS().getPartitionsByNames(parsedCatName, parsedDbName, tblName, 
args);
       ret = FilterUtils.filterPartitionsIfEnabled(isServerFilterEnabled, 
filterHook, ret);
       table = getTable(parsedCatName, parsedDbName, tblName);
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index c453df0ea1b..4a1f2ca8db2 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -739,14 +739,7 @@ class MetaStoreDirectSql {
     return Batchable.runBatched(batchSize, partNames, new Batchable<String, 
Partition>() {
       @Override
       public List<Partition> run(List<String> input) throws MetaException {
-        String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + 
makeParams(input.size()) + ")";
-        List<Long> partitionIds = getPartitionIdsViaSqlFilter(catName, dbName, 
tblName,
-            filter, input, Collections.<String>emptyList(), null);
-        if (partitionIds.isEmpty()) {
-          return Collections.emptyList(); // no partitions, bail early.
-        }
-        return getPartitionsFromPartitionIds(catName, dbName, tblName, null,
-                partitionIds, Collections.emptyList(), false, args);
+        return getPartitionsByNames(catName, dbName, tblName, partNames, 
false, args);
       }
     });
   }
@@ -770,8 +763,7 @@ class MetaStoreDirectSql {
     return Batchable.runBatched(batchSize, partitionIds, new Batchable<Long, 
Partition>() {
       @Override
       public List<Partition> run(List<Long> input) throws MetaException {
-        return getPartitionsFromPartitionIds(catName, dbName,
-            tableName, null, input, Collections.emptyList(), isAcidTable, 
args);
+        return getPartitionsByPartitionIds(catName, dbName, tableName, input, 
isAcidTable, args);
       }
     });
   }
@@ -925,7 +917,7 @@ class MetaStoreDirectSql {
     List<Partition> result = Batchable.runBatched(batchSize, partitionIds, new 
Batchable<Long, Partition>() {
       @Override
       public List<Partition> run(List<Long> input) throws MetaException {
-        return getPartitionsFromPartitionIds(catName, dbName, tblName, null, 
input, Collections.emptyList(), false, args);
+        return getPartitionsByPartitionIds(catName, dbName, tblName, input, 
false, args);
       }
     });
     return result;
@@ -1024,15 +1016,39 @@ class MetaStoreDirectSql {
     }
   }
 
+
   /** Should be called with the list short enough to not trip up Oracle/etc. */
-  private List<Partition> getPartitionsFromPartitionIds(String catName, String 
dbName, String tblName,
-      Boolean isView, List<Long> partIdList, List<String> projectionFields,
-      boolean isAcidTable, GetPartitionsArgs args) throws MetaException {
+  private List<Partition> getPartitionsByNames(String catName, String dbName,
+      String tblName, List<String> partNameList, boolean isAcidTable, 
GetPartitionsArgs args)
+      throws MetaException {
+    // Get most of the fields for the partNames provided.
+    // Assume db and table names are the same for all partition, as provided 
in arguments.
+    String quotedPartNames = partNameList.stream()
+        .map(DirectSqlUpdatePart::quoteString)
+        .collect(Collectors.joining(","));
 
-    boolean doTrace = LOG.isDebugEnabled();
+    String queryText =
+        "select " + PARTITIONS + ".\"PART_ID\"," + SDS + ".\"SD_ID\"," + SDS + 
".\"CD_ID\","
+        + SERDES + ".\"SERDE_ID\"," + PARTITIONS + ".\"CREATE_TIME\"," + 
PARTITIONS
+        + ".\"LAST_ACCESS_TIME\"," + SDS + ".\"INPUT_FORMAT\"," + SDS + 
".\"IS_COMPRESSED\","
+        + SDS + ".\"IS_STOREDASSUBDIRECTORIES\"," + SDS + ".\"LOCATION\"," + 
SDS
+        + ".\"NUM_BUCKETS\"," + SDS + ".\"OUTPUT_FORMAT\"," + SERDES + 
".\"NAME\","
+        + SERDES + ".\"SLIB\"," + PARTITIONS + ".\"WRITE_ID\"" + " from " + 
PARTITIONS
+        + " left outer join " + SDS + " on " + PARTITIONS + ".\"SD_ID\" = " + 
SDS + ".\"SD_ID\" "
+        + " left outer join " + SERDES + " on " + SDS + ".\"SERDE_ID\" = " + 
SERDES + ".\"SERDE_ID\" "
+        + " inner join " + TBLS + " on " + TBLS + ".\"TBL_ID\" = " + 
PARTITIONS + ".\"TBL_ID\" "
+        + " inner join " + DBS + " on " + DBS + ".\"DB_ID\" = " + TBLS + 
".\"DB_ID\" "
+        + " where \"PART_NAME\" in (" + quotedPartNames + ") "
+        + " and " + TBLS + ".\"TBL_NAME\" = ? and " + DBS + ".\"NAME\" = ? and 
" + DBS
+        + ".\"CTLG_NAME\" = ? order by \"PART_NAME\" asc";
+
+    Object[] params = new Object[]{tblName, dbName, catName};
+    return getPartitionsByQuery(catName, dbName, tblName, queryText, params, 
isAcidTable, args);
+  }
 
-    int idStringWidth = (int)Math.ceil(Math.log10(partIdList.size())) + 1; // 
1 for comma
-    int sbCapacity = partIdList.size() * idStringWidth;
+  /** Should be called with the list short enough to not trip up Oracle/etc. */
+  private List<Partition> getPartitionsByPartitionIds(String catName, String 
dbName, String tblName,
+      List<Long> partIdList, boolean isAcidTable, GetPartitionsArgs args) 
throws MetaException {
     // Get most of the fields for the IDs provided.
     // Assume db and table names are the same for all partition, as provided 
in arguments.
     String partIds = getIdListForIn(partIdList);
@@ -1047,6 +1063,13 @@ class MetaStoreDirectSql {
             + ".\"SD_ID\" " + "  left outer join " + SERDES + " on " + SDS + 
".\"SERDE_ID\" = "
             + SERDES + ".\"SERDE_ID\" " + "where \"PART_ID\" in (" + partIds
             + ") order by \"PART_NAME\" asc";
+    return getPartitionsByQuery(catName, dbName, tblName, queryText, null, 
isAcidTable, args);
+  }
+
+  private List<Partition> getPartitionsByQuery(String catName, String dbName, 
String tblName,
+      String queryText, Object[] params, boolean isAcidTable, 
GetPartitionsArgs args)
+      throws MetaException {
+    boolean doTrace = LOG.isDebugEnabled();
 
     // Read all the fields and create partitions, SDs and serdes.
     TreeMap<Long, Partition> partitions = new TreeMap<Long, Partition>();
@@ -1054,10 +1077,10 @@ class MetaStoreDirectSql {
     TreeMap<Long, SerDeInfo> serdes = new TreeMap<Long, SerDeInfo>();
     TreeMap<Long, List<FieldSchema>> colss = new TreeMap<Long, 
List<FieldSchema>>();
     // Keep order by name, consistent with JDO.
-    ArrayList<Partition> orderedResult = new 
ArrayList<Partition>(partIdList.size());
+    ArrayList<Partition> orderedResult;
 
     // Prepare StringBuilder-s for "in (...)" lists to use in one-to-many 
queries.
-    StringBuilder sdSb = new StringBuilder(sbCapacity), serdeSb = new 
StringBuilder(sbCapacity);
+    StringBuilder sdSb = new StringBuilder(), serdeSb = new StringBuilder();
     StringBuilder colsSb = new StringBuilder(7); // We expect that there's 
only one field schema.
     tblName = tblName.toLowerCase();
     dbName = dbName.toLowerCase();
@@ -1066,9 +1089,10 @@ class MetaStoreDirectSql {
 
     try (QueryWrapper query = new 
QueryWrapper(pm.newQuery("javax.jdo.query.SQL", queryText))) {
       long start = doTrace ? System.nanoTime() : 0;
-      List<Object[]> sqlResult = executeWithArray(query.getInnerQuery(), null, 
queryText);
+      List<Object[]> sqlResult = executeWithArray(query.getInnerQuery(), 
params, queryText);
       long queryTime = doTrace ? System.nanoTime() : 0;
       Deadline.checkTimeout();
+      orderedResult = new ArrayList<>(sqlResult.size());
 
       for (Object[] fields : sqlResult) {
         // Here comes the ugly part...
@@ -1161,6 +1185,7 @@ class MetaStoreDirectSql {
       }
       MetastoreDirectSqlUtils.timingTrace(doTrace, queryText, start, 
queryTime);
     }
+    String partIds = getIdListForIn(partitions.keySet());
     // Now get all the one-to-many things. Start with partitions.
     MetastoreDirectSqlUtils
         .setPartitionParametersWithFilter(PARTITION_PARAMS, 
convertMapNullsToEmptyStrings, pm,

Reply via email to