This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new aa743872256 HIVE-25189: Addendum: Cache validWriteIdList after 
SemanticAnalyzer#genResolvedParseTree (#6153)
aa743872256 is described below

commit aa743872256cc4dc941c3f8992960d17ca44c966
Author: Denys Kuzmenko <[email protected]>
AuthorDate: Thu Nov 6 00:00:44 2025 +0100

    HIVE-25189: Addendum: Cache validWriteIdList after 
SemanticAnalyzer#genResolvedParseTree (#6153)
---
 .../apache/hadoop/hive/ql/DriverTxnHandler.java    |   2 +-
 .../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java |   2 +-
 .../hadoop/hive/ql/parse/CacheTableHelper.java     | 201 ---------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |   5 +-
 .../apache/hadoop/hive/ql/parse/HiveTableName.java |   4 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  69 +++----
 .../apache/hadoop/hive/ql/parse/TableHelper.java   |  49 +++++
 7 files changed, 90 insertions(+), 242 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
index 5f10cd129ea..df83412b7be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
@@ -246,7 +246,7 @@ private void acquireLocks() throws 
CommandProcessorException {
       String errorMessage;
       if (driverState.isDestroyed() || driverState.isAborted() || 
driverState.isClosed()) {
         errorMessage = String.format("Ignore lock acquisition related 
exception in terminal state (%s): %s",
-            driverState.toString(), e.getMessage());
+            driverState, e.getMessage());
         CONSOLE.printInfo(errorMessage);
       } else {
         errorMessage = String.format("FAILED: Error in acquiring locks: %s", 
e.getMessage());
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index f3415219d26..26811dbebb7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -2091,7 +2091,7 @@ public void startAnalysis() {
   public void endAnalysis(ASTNode tree) {
     if (ctx != null){
       queryProperties.setUsedTables(
-          CacheTableHelper.getUniqueNames(ctx.getParsedTables()));
+          TableHelper.getUniqueNames(ctx.getParsedTables()));
     }
     setQueryType(tree); // at this point we know the query type for sure
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CacheTableHelper.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CacheTableHelper.java
deleted file mode 100644
index 58b2011ec23..00000000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CacheTableHelper.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.parse;
-
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-
-import com.google.common.base.Preconditions;
-import org.apache.commons.lang3.tuple.Pair;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.hadoop.hive.common.ValidTxnList;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
-import org.apache.hadoop.hive.ql.session.SessionState;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Class to help populate the cache at the beginning of query analysis. We 
would like
- * to minimize the number of calls to fetch validWriteIdLists from the 
metastore. HMS
- * has an API to request this object for multiple tables within one call, and 
this class
- * uses that API.
- *
- * The sole purpose of this class is to help populate the HMS query cache. 
Nothing is returned
- * from the public methods. In this way, if another method attempts to fetch a 
validWriteIdList,
- * the SessionHiveMetaStoreClient query cache will contain the information.
- *
- * Because this class is only responsible for cache population, it is not a 
requirement for
- * a caller to supply all the tables necessary for the query. It is also not a 
requirement
- * for the tables to be part of the query. Of course, the query qill benefit 
if those
- * conditions were true, but if the table is not in the cache, a later call 
fetching the writeids
- * will hit the HMS server and will not fail.
- *
- * One tricky aspect to this class is that if a view is passed in, we want to 
fetch the
- * validWriteIdLists for the underlying tables. At the beginning of the query, 
it is impossible
- * to know the underlying tables without contacting HMS.
- *
- * In order to handle the underlying tables to the views, we keep a cache that 
holds our
- * best guess. If we see a view in any query, we set up a server-wide cache 
that tracks
- * the underlying tables to the view. If the view doesn't change, then this 
information
- * will be accurate and allow us to fetch the underlying tables on our next 
query. If the
- * view does change and the underlying tables are different, our fetch won't 
retrieve the
- * correct information. But that's ok...remember what was said earlier that it 
is not
- * a requirement for the tables to be part of the query. Later on in the 
query, this class
- * will be called on the view level via the populateCacheForView call. At that 
point, if
- * something changed, it will populate the cache with the newly detected 
tables. It will also
- * change the underying table information for the view to optimize the next 
query using
- * the view.
- */
-public class CacheTableHelper {
-  protected static final Logger LOG = 
LoggerFactory.getLogger(CacheTableHelper.class);
-
-  // Server wide cache used to hold what we currently think are the underlying 
tables
-  // for a view (which is the key). This information can go stale, but that's 
ok. The only
-  // repercussion of a stale view is that we will have to make an additional 
HMS call
-  // to retrieve the validWriteIdList for the changed tables.
-  // Will hold 10000 objects, can't imagine that being more than a couple of 
M, tops.
-  private static final Cache<String, Set<String>> underlyingTableHints = 
Caffeine.newBuilder()
-      .maximumSize(10000)
-      .build();
-
-  /**
-   * Populates the cache for the given table pairs. The tables passed in are a 
Pair
-   * containing the dbname (null if not given) and the table name.
-   */
-  public void populateCache(List<Pair<String, String>> tables, HiveConf conf,
-      HiveTxnManager txnMgr) {
-    // if there is no transaction, then we don't need to fetch the 
validWriteIds.
-    if (txnMgr == null || !txnMgr.isTxnOpen()) {
-      return;
-    }
-
-    List<String> fullTableNamesList = new 
ArrayList<>(getAllUniqueViewsAndTables(tables));
-    LOG.debug("Populating query cache");
-    for (String s : fullTableNamesList) {
-      LOG.debug("Populating table " + s);
-    }
-    String validTxnList = conf.get(ValidTxnList.VALID_TXNS_KEY);
-    try {
-      txnMgr.getValidWriteIds(fullTableNamesList, validTxnList);
-    } catch (Exception e) {
-      LOG.info("Population of valid write id list cache failed, will be done 
later in query.");
-    }
-  }
-
-  /**
-   * Populates the cache for the given table pairs associated with a viewName.
-   * If the table names provided match the table names that we think are 
associated
-   * with the view, we just return, because we presume that they have already 
been
-   * popuated via the "populateCache" method. If they are different, we 
populate the cache
-   * with the new associated tables and change our associated tables for the 
view.
-   */
-  public void populateCacheForView(List<Pair<String, String>> tables, HiveConf 
conf,
-      HiveTxnManager txnMgr, String dbName, String viewName) {
-    if (!conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_VIEW_CACHE_ENABLED)) {
-      return;
-    }
-
-    String completeViewName = dbName + "." + viewName;
-    LOG.debug("Found view while parsing: " + completeViewName);
-    Set<String> underlyingTablesAndViews = getUniqueNames(tables);
-    // If the tables passed in match our cache, assume the cache has already
-    // been populated.
-    if 
(underlyingTablesAndViews.equals(underlyingTableHints.getIfPresent(completeViewName)))
 {
-      LOG.debug("View already cached.");
-      return;
-    }
-    // populate the metastore cache for this view.
-    populateCache(tables, conf, txnMgr);
-    // cache the names of the tables for the given view in our server-wide 
cache.
-    underlyingTableHints.put(completeViewName, underlyingTablesAndViews);
-  }
-
-  /**
-   * Return all the unique views and tables given a list of tables. We iterate
-   * through all the tables passed in. If it is a table in the list, we add the
-   * table to our unique list. If it is a view, we add what we think are the
-   * tables and views used by the view and iterate through those.
-   *
-   * In order to generate a unique list, we need to add the dbname (from the 
SessionState)
-   * if the db was not provided in the query from the user.
-   */
-  private Set<String> getAllUniqueViewsAndTables(List<Pair<String, String>> 
tables) {
-    Set<String> fullTableNames = new HashSet<>();
-    Queue<Pair<String, String>> queue = new LinkedList<>(tables);
-    LOG.debug("Getting all tables.");
-    while (queue.peek() != null) {
-      String tableOrView = getTableName(queue.remove());
-      LOG.debug("Getting table " + tableOrView);
-      Set<String> underlyingTables = 
underlyingTableHints.getIfPresent(tableOrView);
-      if (underlyingTables != null) {
-        // don't process the same table twice.
-        if (fullTableNames.contains(tableOrView)) {
-          continue;
-        }
-        // it's a view.
-        LOG.debug("View in cache, adding its tables to queue.");
-        for (String viewTable : underlyingTables) {
-          LOG.debug("View table is " + viewTable);
-          String[] dbTableArray = viewTable.split("\\.");
-          Preconditions.checkNotNull(dbTableArray[0]);
-          Preconditions.checkNotNull(dbTableArray[1]);
-          queue.offer(new ImmutablePair<String, String>(dbTableArray[0], 
dbTableArray[1]));
-        }
-      }
-      // We'll fetch validWriteIdList information whether it's a view or a 
table.
-      fullTableNames.add(tableOrView);
-    }
-    return fullTableNames;
-  }
-
-  /**
-   * Get the unique names from a table list. The list may contain some cases 
where
-   * both the dbname and tablename are provided and some cases where the 
dbname is
-   * null, in which case we need to grab the dbname from the SessionState.
-   */
-  public static Set<String> getUniqueNames(List<Pair<String, String>> tables) {
-    Set<String> names = new HashSet<>();
-    for (Pair<String, String> table : tables) {
-      names.add(getTableName(table));
-    }
-    return names;
-  }
-
-  /**
-   * Get the table name from a dbname/tablename pair. If the dbname is
-   * null, use the SessionState to provide the dbname.
-   */
-  private static String getTableName(Pair<String, String> dbTablePair) {
-    String dbName = dbTablePair.getLeft() == null
-        ? SessionState.get().getCurrentDatabase() : dbTablePair.getLeft();
-    return dbName + "." + dbTablePair.getRight();
-  }
-}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 5150de01baa..19c780250eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2458,8 +2458,7 @@ protected Set<TableName> getTablesUsed(RelNode plan) {
       new RelVisitor() {
         @Override
         public void visit(RelNode node, int ordinal, RelNode parent) {
-          if (node instanceof TableScan) {
-            TableScan ts = (TableScan) node;
+          if (node instanceof TableScan ts) {
             Table table = ((RelOptHiveTable) ts.getTable()).getHiveTableMD();
             if (AcidUtils.isTransactionalTable(table) ||
                   table.isNonNative() && 
table.getStorageHandler().areSnapshotsSupported()) {
@@ -5112,7 +5111,7 @@ private ImmutableMap<String, Integer> 
buildHiveColNameToInputPosMap(
       return hiveColNameToInputPosMapBuilder.build();
     }
 
-    private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException {
+    private QBParseInfo getQBParseInfo(QB qb) {
       return qb.getParseInfo();
     }
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java
index d0a723669ff..20965b0b2ad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java
@@ -45,14 +45,12 @@ public static TableName of(Table table) throws 
SemanticException {
    * Set a @{@link Table} object's table and db names based on the provided 
string.
    * @param dbTable the dbtable string
    * @param table the table to update
-   * @return the table
    * @throws SemanticException
    */
-  public static Table setFrom(String dbTable, Table table) throws 
SemanticException{
+  public static void setFrom(String dbTable, Table table) throws 
SemanticException{
     TableName name = ofNullable(dbTable);
     table.setTableName(name.getTable());
     table.setDbName(name.getDb());
-    return table;
   }
 
   /**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1a996268b69..c10e55b2d37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -87,7 +87,6 @@
 import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
 import org.apache.hadoop.hive.common.StringInternUtils;
 import org.apache.hadoop.hive.common.TableName;
-import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
 import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
 import org.apache.hadoop.hive.conf.Constants;
@@ -363,7 +362,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   private final Map<TableScanOperator, Map<String, ExprNodeDesc>> 
opToPartToSkewedPruner;
   private Map<SelectOperator, Table> viewProjectToTableSchema;
   private Operator<? extends OperatorDesc> sinkOp;
-  private final CacheTableHelper cacheTableHelper = new CacheTableHelper();
 
   /**
    * a map for the split sampling, from alias to an instance of SplitSample
@@ -2883,8 +2881,7 @@ private void replaceViewReferenceWithDefinition(QB qb, 
Table tab,
       String viewText = tab.getViewExpandedText();
       TableMask viewMask = new TableMask(this, conf, false);
       viewTree = ParseUtils.parse(viewText, ctx, tab.getCompleteName());
-      cacheTableHelper.populateCacheForView(ctx.getParsedTables(), conf,
-          getTxnMgr(), tab.getDbName(), tab.getTableName());
+
       if (viewMask.isEnabled() && analyzeRewrite == null) {
         ParseResult parseResult = rewriteASTWithMaskAndFilter(viewMask, 
viewTree,
             ctx.getViewTokenRewriteStream(viewFullyQualifiedName),
@@ -13163,16 +13160,13 @@ void analyzeInternal(ASTNode ast, 
Supplier<PlannerContext> pcf) throws SemanticE
     perfLogger.perfLogBegin(this.getClass().getName(), 
PerfLogger.GENERATE_RESOLVED_PARSETREE);
     // 1. Generate Resolved Parse tree from syntax tree
     boolean needsTransform = needsTransform();
-    //change the location of position alias process here
+    // change the location of position alias process here
     processPositionAlias(ast);
-    cacheTableHelper.populateCache(ctx.getParsedTables(), conf, getTxnMgr());
     PlannerContext plannerCtx = pcf.get();
     if (!genResolvedParseTree(ast, plannerCtx)) {
       return;
     }
-    if 
(tablesFromReadEntities(inputs).stream().anyMatch(AcidUtils::isTransactionalTable))
 {
-      queryState.getValidTxnList();
-    }
+    openTxnAndSetValidTxnList();
 
     if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
       for (String alias : qb.getSubqAliases()) {
@@ -15164,39 +15158,48 @@ private String getQueryStringForCache(ASTNode ast) {
             .toString(RESULTS_CACHE_KEY_TOKEN_REWRITE_PROGRAM, 
ast.getTokenStartIndex(), ast.getTokenStopIndex());
   }
 
-  private ValidTxnWriteIdList getQueryValidTxnWriteIdList() throws 
SemanticException {
+  private void openTxnAndSetValidTxnList() throws SemanticException {
+    if 
(tablesFromReadEntities(inputs).stream().noneMatch(AcidUtils::isTransactionalTable)
+        || SessionState.get().isCompaction()) {
+      return;
+    }
+    if (queryState.getHMSCache() != null) {
+      // this step primes the cache containing the validTxnWriteIdList. It 
will fetch
+      // all the tables into the MetaStore Client cache with one HMS call.
+      getValidTxnWriteIdList();
+    } else {
+      queryState.getValidTxnList();
+    }
+  }
+
+  private ValidTxnWriteIdList getValidTxnWriteIdList() throws 
SemanticException {
     // TODO: Once HIVE-18948 is in, should be able to retrieve writeIdList 
from the conf.
-    //cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
+    // cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
     //
-    List<String> transactionalTables = tablesFromReadEntities(inputs)
-      .stream()
-      .filter(AcidUtils::isTransactionalTable)
-      .map(Table::getFullyQualifiedName)
-      .collect(Collectors.toList());
-    
-    if (transactionalTables.size() > 0) {
-      String txnString = queryState.getValidTxnList();
-      if (txnString == null) {
-        return null;
-      }
-      try {
-        return getTxnMgr().getValidWriteIds(transactionalTables, txnString);
-      } catch (Exception err) {
-        String msg = "Error while getting the txnWriteIdList for tables " + 
transactionalTables
-                + " and validTxnList " + conf.get(ValidTxnList.VALID_TXNS_KEY);
-        throw new SemanticException(msg, err);
-      }
-    }
+    var transactionalTables = tablesFromReadEntities(inputs)
+        .stream()
+        .filter(AcidUtils::isTransactionalTable)
+        .map(Table::getFullyQualifiedName)
+        .toList();
 
-    // No transactional tables.
-    return null;
+    if (transactionalTables.isEmpty()) {
+      return null;
+    }
+    String txnString = queryState.getValidTxnList();
+    try {
+      return getTxnMgr().getValidWriteIds(transactionalTables, txnString);
+    } catch (Exception err) {
+      String msg = "Error while getting the txnWriteIdList for tables " + 
transactionalTables
+          + " and validTxnList " + txnString;
+      throw new SemanticException(msg, err);
+    }
   }
 
   private QueryResultsCache.LookupInfo createLookupInfoForQuery(ASTNode 
astNode) throws SemanticException {
     QueryResultsCache.LookupInfo lookupInfo = null;
     String queryString = getQueryStringForCache(astNode);
     if (queryString != null) {
-      ValidTxnWriteIdList writeIdList = getQueryValidTxnWriteIdList();
+      ValidTxnWriteIdList writeIdList = getValidTxnWriteIdList();
       Set<Long> involvedTables = tablesFromReadEntities(inputs).stream()
           .map(Table::getTTable)
           .map(org.apache.hadoop.hive.metastore.api.Table::getId)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TableHelper.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableHelper.java
new file mode 100644
index 00000000000..9ccc2b2c198
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableHelper.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+public class TableHelper {
+  /**
+   * Get the unique names from a table list. The list may contain some cases 
where
+   * both the dbname and tablename are provided and some cases where the 
dbname is
+   * null, in which case we need to grab the dbname from the SessionState.
+   */
+  public static Set<String> getUniqueNames(List<Pair<String, String>> tables) {
+    return tables.stream().map(TableHelper::getTableName)
+        .collect(Collectors.toSet());
+  }
+
+  /**
+   * Get the table name from a dbname/tablename pair. If the dbname is
+   * null, use the SessionState to provide the dbname.
+   */
+  private static String getTableName(Pair<String, String> dbTablePair) {
+    String dbName = dbTablePair.getLeft() == null ?
+        SessionState.get().getCurrentDatabase() : dbTablePair.getLeft();
+    return TableName.getDbTable(dbName, dbTablePair.getRight());
+  }
+}

Reply via email to