This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new aa743872256 HIVE-25189: Addendum: Cache validWriteIdList after
SemanticAnalyzer#genResolvedParseTree (#6153)
aa743872256 is described below
commit aa743872256cc4dc941c3f8992960d17ca44c966
Author: Denys Kuzmenko <[email protected]>
AuthorDate: Thu Nov 6 00:00:44 2025 +0100
HIVE-25189: Addendum: Cache validWriteIdList after
SemanticAnalyzer#genResolvedParseTree (#6153)
---
.../apache/hadoop/hive/ql/DriverTxnHandler.java | 2 +-
.../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java | 2 +-
.../hadoop/hive/ql/parse/CacheTableHelper.java | 201 ---------------------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../apache/hadoop/hive/ql/parse/HiveTableName.java | 4 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 69 +++----
.../apache/hadoop/hive/ql/parse/TableHelper.java | 49 +++++
7 files changed, 90 insertions(+), 242 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
index 5f10cd129ea..df83412b7be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverTxnHandler.java
@@ -246,7 +246,7 @@ private void acquireLocks() throws
CommandProcessorException {
String errorMessage;
if (driverState.isDestroyed() || driverState.isAborted() ||
driverState.isClosed()) {
errorMessage = String.format("Ignore lock acquisition related
exception in terminal state (%s): %s",
- driverState.toString(), e.getMessage());
+ driverState, e.getMessage());
CONSOLE.printInfo(errorMessage);
} else {
errorMessage = String.format("FAILED: Error in acquiring locks: %s",
e.getMessage());
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index f3415219d26..26811dbebb7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -2091,7 +2091,7 @@ public void startAnalysis() {
public void endAnalysis(ASTNode tree) {
if (ctx != null){
queryProperties.setUsedTables(
- CacheTableHelper.getUniqueNames(ctx.getParsedTables()));
+ TableHelper.getUniqueNames(ctx.getParsedTables()));
}
setQueryType(tree); // at this point we know the query type for sure
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CacheTableHelper.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CacheTableHelper.java
deleted file mode 100644
index 58b2011ec23..00000000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CacheTableHelper.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.parse;
-
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-
-import com.github.benmanes.caffeine.cache.Cache;
-import com.github.benmanes.caffeine.cache.Caffeine;
-
-import com.google.common.base.Preconditions;
-import org.apache.commons.lang3.tuple.Pair;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.hadoop.hive.common.ValidTxnList;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
-import org.apache.hadoop.hive.ql.session.SessionState;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Class to help populate the cache at the beginning of query analysis. We
would like
- * to minimize the number of calls to fetch validWriteIdLists from the
metastore. HMS
- * has an API to request this object for multiple tables within one call, and
this class
- * uses that API.
- *
- * The sole purpose of this class is to help populate the HMS query cache.
Nothing is returned
- * from the public methods. In this way, if another method attempts to fetch a
validWriteIdList,
- * the SessionHiveMetaStoreClient query cache will contain the information.
- *
- * Because this class is only responsible for cache population, it is not a
requirement for
- * a caller to supply all the tables necessary for the query. It is also not a
requirement
- * for the tables to be part of the query. Of course, the query qill benefit
if those
- * conditions were true, but if the table is not in the cache, a later call
fetching the writeids
- * will hit the HMS server and will not fail.
- *
- * One tricky aspect to this class is that if a view is passed in, we want to
fetch the
- * validWriteIdLists for the underlying tables. At the beginning of the query,
it is impossible
- * to know the underlying tables without contacting HMS.
- *
- * In order to handle the underlying tables to the views, we keep a cache that
holds our
- * best guess. If we see a view in any query, we set up a server-wide cache
that tracks
- * the underlying tables to the view. If the view doesn't change, then this
information
- * will be accurate and allow us to fetch the underlying tables on our next
query. If the
- * view does change and the underlying tables are different, our fetch won't
retrieve the
- * correct information. But that's ok...remember what was said earlier that it
is not
- * a requirement for the tables to be part of the query. Later on in the
query, this class
- * will be called on the view level via the populateCacheForView call. At that
point, if
- * something changed, it will populate the cache with the newly detected
tables. It will also
- * change the underying table information for the view to optimize the next
query using
- * the view.
- */
-public class CacheTableHelper {
- protected static final Logger LOG =
LoggerFactory.getLogger(CacheTableHelper.class);
-
- // Server wide cache used to hold what we currently think are the underlying
tables
- // for a view (which is the key). This information can go stale, but that's
ok. The only
- // repercussion of a stale view is that we will have to make an additional
HMS call
- // to retrieve the validWriteIdList for the changed tables.
- // Will hold 10000 objects, can't imagine that being more than a couple of
M, tops.
- private static final Cache<String, Set<String>> underlyingTableHints =
Caffeine.newBuilder()
- .maximumSize(10000)
- .build();
-
- /**
- * Populates the cache for the given table pairs. The tables passed in are a
Pair
- * containing the dbname (null if not given) and the table name.
- */
- public void populateCache(List<Pair<String, String>> tables, HiveConf conf,
- HiveTxnManager txnMgr) {
- // if there is no transaction, then we don't need to fetch the
validWriteIds.
- if (txnMgr == null || !txnMgr.isTxnOpen()) {
- return;
- }
-
- List<String> fullTableNamesList = new
ArrayList<>(getAllUniqueViewsAndTables(tables));
- LOG.debug("Populating query cache");
- for (String s : fullTableNamesList) {
- LOG.debug("Populating table " + s);
- }
- String validTxnList = conf.get(ValidTxnList.VALID_TXNS_KEY);
- try {
- txnMgr.getValidWriteIds(fullTableNamesList, validTxnList);
- } catch (Exception e) {
- LOG.info("Population of valid write id list cache failed, will be done
later in query.");
- }
- }
-
- /**
- * Populates the cache for the given table pairs associated with a viewName.
- * If the table names provided match the table names that we think are
associated
- * with the view, we just return, because we presume that they have already
been
- * popuated via the "populateCache" method. If they are different, we
populate the cache
- * with the new associated tables and change our associated tables for the
view.
- */
- public void populateCacheForView(List<Pair<String, String>> tables, HiveConf
conf,
- HiveTxnManager txnMgr, String dbName, String viewName) {
- if (!conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_VIEW_CACHE_ENABLED)) {
- return;
- }
-
- String completeViewName = dbName + "." + viewName;
- LOG.debug("Found view while parsing: " + completeViewName);
- Set<String> underlyingTablesAndViews = getUniqueNames(tables);
- // If the tables passed in match our cache, assume the cache has already
- // been populated.
- if
(underlyingTablesAndViews.equals(underlyingTableHints.getIfPresent(completeViewName)))
{
- LOG.debug("View already cached.");
- return;
- }
- // populate the metastore cache for this view.
- populateCache(tables, conf, txnMgr);
- // cache the names of the tables for the given view in our server-wide
cache.
- underlyingTableHints.put(completeViewName, underlyingTablesAndViews);
- }
-
- /**
- * Return all the unique views and tables given a list of tables. We iterate
- * through all the tables passed in. If it is a table in the list, we add the
- * table to our unique list. If it is a view, we add what we think are the
- * tables and views used by the view and iterate through those.
- *
- * In order to generate a unique list, we need to add the dbname (from the
SessionState)
- * if the db was not provided in the query from the user.
- */
- private Set<String> getAllUniqueViewsAndTables(List<Pair<String, String>>
tables) {
- Set<String> fullTableNames = new HashSet<>();
- Queue<Pair<String, String>> queue = new LinkedList<>(tables);
- LOG.debug("Getting all tables.");
- while (queue.peek() != null) {
- String tableOrView = getTableName(queue.remove());
- LOG.debug("Getting table " + tableOrView);
- Set<String> underlyingTables =
underlyingTableHints.getIfPresent(tableOrView);
- if (underlyingTables != null) {
- // don't process the same table twice.
- if (fullTableNames.contains(tableOrView)) {
- continue;
- }
- // it's a view.
- LOG.debug("View in cache, adding its tables to queue.");
- for (String viewTable : underlyingTables) {
- LOG.debug("View table is " + viewTable);
- String[] dbTableArray = viewTable.split("\\.");
- Preconditions.checkNotNull(dbTableArray[0]);
- Preconditions.checkNotNull(dbTableArray[1]);
- queue.offer(new ImmutablePair<String, String>(dbTableArray[0],
dbTableArray[1]));
- }
- }
- // We'll fetch validWriteIdList information whether it's a view or a
table.
- fullTableNames.add(tableOrView);
- }
- return fullTableNames;
- }
-
- /**
- * Get the unique names from a table list. The list may contain some cases
where
- * both the dbname and tablename are provided and some cases where the
dbname is
- * null, in which case we need to grab the dbname from the SessionState.
- */
- public static Set<String> getUniqueNames(List<Pair<String, String>> tables) {
- Set<String> names = new HashSet<>();
- for (Pair<String, String> table : tables) {
- names.add(getTableName(table));
- }
- return names;
- }
-
- /**
- * Get the table name from a dbname/tablename pair. If the dbname is
- * null, use the SessionState to provide the dbname.
- */
- private static String getTableName(Pair<String, String> dbTablePair) {
- String dbName = dbTablePair.getLeft() == null
- ? SessionState.get().getCurrentDatabase() : dbTablePair.getLeft();
- return dbName + "." + dbTablePair.getRight();
- }
-}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 5150de01baa..19c780250eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2458,8 +2458,7 @@ protected Set<TableName> getTablesUsed(RelNode plan) {
new RelVisitor() {
@Override
public void visit(RelNode node, int ordinal, RelNode parent) {
- if (node instanceof TableScan) {
- TableScan ts = (TableScan) node;
+ if (node instanceof TableScan ts) {
Table table = ((RelOptHiveTable) ts.getTable()).getHiveTableMD();
if (AcidUtils.isTransactionalTable(table) ||
table.isNonNative() &&
table.getStorageHandler().areSnapshotsSupported()) {
@@ -5112,7 +5111,7 @@ private ImmutableMap<String, Integer>
buildHiveColNameToInputPosMap(
return hiveColNameToInputPosMapBuilder.build();
}
- private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException {
+ private QBParseInfo getQBParseInfo(QB qb) {
return qb.getParseInfo();
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java
index d0a723669ff..20965b0b2ad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveTableName.java
@@ -45,14 +45,12 @@ public static TableName of(Table table) throws
SemanticException {
* Set a @{@link Table} object's table and db names based on the provided
string.
* @param dbTable the dbtable string
* @param table the table to update
- * @return the table
* @throws SemanticException
*/
- public static Table setFrom(String dbTable, Table table) throws
SemanticException{
+ public static void setFrom(String dbTable, Table table) throws
SemanticException{
TableName name = ofNullable(dbTable);
table.setTableName(name.getTable());
table.setDbName(name.getDb());
- return table;
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1a996268b69..c10e55b2d37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -87,7 +87,6 @@
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
import org.apache.hadoop.hive.common.StringInternUtils;
import org.apache.hadoop.hive.common.TableName;
-import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
import org.apache.hadoop.hive.conf.Constants;
@@ -363,7 +362,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
private final Map<TableScanOperator, Map<String, ExprNodeDesc>>
opToPartToSkewedPruner;
private Map<SelectOperator, Table> viewProjectToTableSchema;
private Operator<? extends OperatorDesc> sinkOp;
- private final CacheTableHelper cacheTableHelper = new CacheTableHelper();
/**
* a map for the split sampling, from alias to an instance of SplitSample
@@ -2883,8 +2881,7 @@ private void replaceViewReferenceWithDefinition(QB qb,
Table tab,
String viewText = tab.getViewExpandedText();
TableMask viewMask = new TableMask(this, conf, false);
viewTree = ParseUtils.parse(viewText, ctx, tab.getCompleteName());
- cacheTableHelper.populateCacheForView(ctx.getParsedTables(), conf,
- getTxnMgr(), tab.getDbName(), tab.getTableName());
+
if (viewMask.isEnabled() && analyzeRewrite == null) {
ParseResult parseResult = rewriteASTWithMaskAndFilter(viewMask,
viewTree,
ctx.getViewTokenRewriteStream(viewFullyQualifiedName),
@@ -13163,16 +13160,13 @@ void analyzeInternal(ASTNode ast,
Supplier<PlannerContext> pcf) throws SemanticE
perfLogger.perfLogBegin(this.getClass().getName(),
PerfLogger.GENERATE_RESOLVED_PARSETREE);
// 1. Generate Resolved Parse tree from syntax tree
boolean needsTransform = needsTransform();
- //change the location of position alias process here
+ // change the location of position alias process here
processPositionAlias(ast);
- cacheTableHelper.populateCache(ctx.getParsedTables(), conf, getTxnMgr());
PlannerContext plannerCtx = pcf.get();
if (!genResolvedParseTree(ast, plannerCtx)) {
return;
}
- if
(tablesFromReadEntities(inputs).stream().anyMatch(AcidUtils::isTransactionalTable))
{
- queryState.getValidTxnList();
- }
+ openTxnAndSetValidTxnList();
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
for (String alias : qb.getSubqAliases()) {
@@ -15164,39 +15158,48 @@ private String getQueryStringForCache(ASTNode ast) {
.toString(RESULTS_CACHE_KEY_TOKEN_REWRITE_PROGRAM,
ast.getTokenStartIndex(), ast.getTokenStopIndex());
}
- private ValidTxnWriteIdList getQueryValidTxnWriteIdList() throws
SemanticException {
+ private void openTxnAndSetValidTxnList() throws SemanticException {
+ if
(tablesFromReadEntities(inputs).stream().noneMatch(AcidUtils::isTransactionalTable)
+ || SessionState.get().isCompaction()) {
+ return;
+ }
+ if (queryState.getHMSCache() != null) {
+ // this step primes the cache containing the validTxnWriteIdList. It
will fetch
+ // all the tables into the MetaStore Client cache with one HMS call.
+ getValidTxnWriteIdList();
+ } else {
+ queryState.getValidTxnList();
+ }
+ }
+
+ private ValidTxnWriteIdList getValidTxnWriteIdList() throws
SemanticException {
// TODO: Once HIVE-18948 is in, should be able to retrieve writeIdList
from the conf.
- //cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
+ // cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
//
- List<String> transactionalTables = tablesFromReadEntities(inputs)
- .stream()
- .filter(AcidUtils::isTransactionalTable)
- .map(Table::getFullyQualifiedName)
- .collect(Collectors.toList());
-
- if (transactionalTables.size() > 0) {
- String txnString = queryState.getValidTxnList();
- if (txnString == null) {
- return null;
- }
- try {
- return getTxnMgr().getValidWriteIds(transactionalTables, txnString);
- } catch (Exception err) {
- String msg = "Error while getting the txnWriteIdList for tables " +
transactionalTables
- + " and validTxnList " + conf.get(ValidTxnList.VALID_TXNS_KEY);
- throw new SemanticException(msg, err);
- }
- }
+ var transactionalTables = tablesFromReadEntities(inputs)
+ .stream()
+ .filter(AcidUtils::isTransactionalTable)
+ .map(Table::getFullyQualifiedName)
+ .toList();
- // No transactional tables.
- return null;
+ if (transactionalTables.isEmpty()) {
+ return null;
+ }
+ String txnString = queryState.getValidTxnList();
+ try {
+ return getTxnMgr().getValidWriteIds(transactionalTables, txnString);
+ } catch (Exception err) {
+ String msg = "Error while getting the txnWriteIdList for tables " +
transactionalTables
+ + " and validTxnList " + txnString;
+ throw new SemanticException(msg, err);
+ }
}
private QueryResultsCache.LookupInfo createLookupInfoForQuery(ASTNode
astNode) throws SemanticException {
QueryResultsCache.LookupInfo lookupInfo = null;
String queryString = getQueryStringForCache(astNode);
if (queryString != null) {
- ValidTxnWriteIdList writeIdList = getQueryValidTxnWriteIdList();
+ ValidTxnWriteIdList writeIdList = getValidTxnWriteIdList();
Set<Long> involvedTables = tablesFromReadEntities(inputs).stream()
.map(Table::getTTable)
.map(org.apache.hadoop.hive.metastore.api.Table::getId)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TableHelper.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableHelper.java
new file mode 100644
index 00000000000..9ccc2b2c198
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableHelper.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+public class TableHelper {
+ /**
+ * Get the unique names from a table list. The list may contain some cases
where
+ * both the dbname and tablename are provided and some cases where the
dbname is
+ * null, in which case we need to grab the dbname from the SessionState.
+ */
+ public static Set<String> getUniqueNames(List<Pair<String, String>> tables) {
+ return tables.stream().map(TableHelper::getTableName)
+ .collect(Collectors.toSet());
+ }
+
+ /**
+ * Get the table name from a dbname/tablename pair. If the dbname is
+ * null, use the SessionState to provide the dbname.
+ */
+ private static String getTableName(Pair<String, String> dbTablePair) {
+ String dbName = dbTablePair.getLeft() == null ?
+ SessionState.get().getCurrentDatabase() : dbTablePair.getLeft();
+ return TableName.getDbTable(dbName, dbTablePair.getRight());
+ }
+}