pvary commented on a change in pull request #2826:
URL: https://github.com/apache/hive/pull/2826#discussion_r758973881



##########
File path: 
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -5275,38 +5275,67 @@ private void 
removeUnusedColumnDescriptor(MColumnDescriptor oldCD) {
       return;
     }
 
-    boolean success = false;
     Query query = null;
+    Query query2 = null;
+    boolean success = false;
+    LOG.debug("execute removeUnusedColumnDescriptor");
+    DatabaseProduct dbProduct = 
DatabaseProduct.determineDatabaseProduct(MetaStoreDirectSql.getProductName(pm), 
conf);
 
+    /**
+     * In order to workaround oracle not supporting limit statement caused 
performance issue, HIVE-9447 makes
+     * all the backend DB run select count(1) from SDS where SDS.CD_ID=? to 
check if the specific CD_ID is
+     * referenced in SDS table before drop a partition. This select count(1) 
statement does not scale well in
+     * Postgres, and there is no index for CD_ID column in SDS table.
+     * For a SDS table with with 1.5 million rows, select count(1) has average 
700ms without index, while in
+     * 10-20ms with index. But the statement before
+     * HIVE-9447( SELECT * FROM "SDS" "A0" WHERE "A0"."CD_ID" = $1 limit 1) 
uses less than 10ms .
+     */
     try {
       openTransaction();
-      LOG.debug("execute removeUnusedColumnDescriptor");
-
-      query = pm.newQuery("select count(1) from " +
-        "org.apache.hadoop.hive.metastore.model.MStorageDescriptor where 
(this.cd == inCD)");
-      query.declareParameters("MColumnDescriptor inCD");
-      long count = ((Long)query.execute(oldCD)).longValue();
-
-      //if no other SD references this CD, we can throw it out.
-      if (count == 0) {
-        // First remove any constraints that may be associated with this CD
-        query = pm.newQuery(MConstraint.class, "parentColumn == inCD || 
childColumn == inCD");
+      // Fix performance regression for postgres caused by HIVE-9447
+      if (dbProduct.isPOSTGRES() || dbProduct.isMYSQL()) {
+        query = pm.newQuery(MStorageDescriptor.class, "this.cd == inCD");
+        query.declareParameters("MColumnDescriptor inCD");
+        List<MStorageDescriptor> referencedSDs = 
listStorageDescriptorsWithCD(oldCD, query);
+        //if no other SD references this CD, we can throw it out.
+        if (referencedSDs != null && referencedSDs.isEmpty()) {
+          query2 = removeConstraintsAndCd(oldCD);
+        }
+      } else {
+        query = pm.newQuery(
+            "select count(1) from 
org.apache.hadoop.hive.metastore.model.MStorageDescriptor where (this.cd == 
inCD)");
         query.declareParameters("MColumnDescriptor inCD");
-        List<MConstraint> mConstraintsList = (List<MConstraint>) 
query.execute(oldCD);
-        if (CollectionUtils.isNotEmpty(mConstraintsList)) {
-          pm.deletePersistentAll(mConstraintsList);
+        long count = (Long) query.execute(oldCD);
+        //if no other SD references this CD, we can throw it out.
+        if (count == 0) {
+          query2 = removeConstraintsAndCd(oldCD);
         }
-        // Finally remove CD
-        pm.retrieve(oldCD);
-        pm.deletePersistent(oldCD);
       }
       success = commitTransaction();
-      LOG.debug("successfully deleted a CD in removeUnusedColumnDescriptor");
     } finally {
       rollbackAndCleanup(success, query);
+      if (query2 != null) {
+        query2.closeAll();
+      }
     }
   }
 
+  private Query removeConstraintsAndCd(MColumnDescriptor oldCD) {
+    Query query = null;

Review comment:
       I would close the query inside this method and would not leak it. Why do 
we do that? I might miss something 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to