[
https://issues.apache.org/jira/browse/HIVE-25900?focusedWorklogId=717787&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-717787
]
ASF GitHub Bot logged work on HIVE-25900:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 31/Jan/22 07:22
Start Date: 31/Jan/22 07:22
Worklog Time Spent: 10m
Work Description: kasakrisz commented on a change in pull request #2984:
URL: https://github.com/apache/hive/pull/2984#discussion_r795396163
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
##########
@@ -139,109 +200,262 @@ public void init() {
}
}
- public void init(Hive db) {
+ private static void init(Hive db) {
final boolean dummy =
db.getConf().get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname)
.equals("DUMMY");
if (dummy) {
// Dummy registry does not cache information and forwards all requests
to metastore
- initialized.set(true);
+ SINGLETON = new MaterializedViewsRegistry() {};
LOG.info("Using dummy materialized views registry");
} else {
+ SINGLETON = new
InMemoryMaterializedViewsRegistry(HiveMaterializedViewsRegistry::createMaterialization);
// We initialize the cache
long period = HiveConf.getTimeVar(db.getConf(),
ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_REFRESH, TimeUnit.SECONDS);
+ if (period <= 0) {
+ return;
+ }
+
ScheduledExecutorService pool =
Executors.newSingleThreadScheduledExecutor(
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("HiveMaterializedViewsRegistry-%d")
.build());
- pool.scheduleAtFixedRate(new Loader(db), 0, period, TimeUnit.SECONDS);
+
+ MaterializedViewObjects objects =
db::getAllMaterializedViewObjectsForRewriting;
+ pool.scheduleAtFixedRate(new Loader(db.getConf(), SINGLETON, objects),
0, period, TimeUnit.SECONDS);
}
}
- private class Loader implements Runnable {
- private final Hive db;
+ public interface MaterializedViewObjects {
+ List<Table> getAllMaterializedViewObjectsForRewriting() throws
HiveException;
+ }
- private Loader(Hive db) {
- this.db = db;
+ public static class Loader implements Runnable {
+ protected final HiveConf hiveConf;
+ protected final MaterializedViewsRegistry materializedViewsRegistry;
+ protected final MaterializedViewObjects materializedViewObjects;
+ /* Whether the cache has been initialized or not. */
+
+ Loader(HiveConf hiveConf,
+ MaterializedViewsRegistry materializedViewsRegistry,
+ MaterializedViewObjects materializedViewObjects) {
+ this.hiveConf = hiveConf;
+ this.materializedViewsRegistry = materializedViewsRegistry;
+ this.materializedViewObjects = materializedViewObjects;
}
@Override
public void run() {
- SessionState ss = new SessionState(db.getConf());
- ss.setIsHiveServerQuery(true); // All is served from HS2, we do not need
e.g. Tez sessions
- SessionState.start(ss);
- PerfLogger perfLogger = SessionState.getPerfLogger();
+ refresh();
+ }
+
+ public void refresh() {
+ PerfLogger perfLogger = getPerfLogger();
perfLogger.perfLogBegin(CLASS_NAME,
PerfLogger.MATERIALIZED_VIEWS_REGISTRY_REFRESH);
try {
- if (initialized.get()) {
- for (Table mvTable : db.getAllMaterializedViewObjectsForRewriting())
{
- RelOptMaterialization existingMV = getRewritingMaterializedView(
- mvTable.getDbName(), mvTable.getTableName(), ALL);
- if (existingMV != null) {
- // We replace if the existing MV is not newer
- Table existingMVTable =
HiveMaterializedViewUtils.extractTable(existingMV);
- if (existingMVTable.getCreateTime() < mvTable.getCreateTime() ||
- (existingMVTable.getCreateTime() == mvTable.getCreateTime()
&&
- existingMVTable.getMVMetadata().getMaterializationTime()
<= mvTable.getMVMetadata().getMaterializationTime())) {
- refreshMaterializedView(db.getConf(), existingMVTable,
mvTable);
- }
- } else {
- // Simply replace if it still does not exist
- refreshMaterializedView(db.getConf(), null, mvTable);
+ List<Table> materializedViewObjects =
this.materializedViewObjects.getAllMaterializedViewObjectsForRewriting();
+ for (Table mvTable : materializedViewObjects) {
+ RelOptMaterialization existingMV =
materializedViewsRegistry.getRewritingMaterializedView(
+ mvTable.getDbName(), mvTable.getTableName(), ALL);
+ if (existingMV != null) {
+ // We replace if the existing MV is not newer
+ Table existingMVTable =
HiveMaterializedViewUtils.extractTable(existingMV);
+ if (existingMVTable.getCreateTime() < mvTable.getCreateTime() ||
+ (existingMVTable.getCreateTime() ==
mvTable.getCreateTime() &&
+
existingMVTable.getMVMetadata().getMaterializationTime() <=
mvTable.getMVMetadata().getMaterializationTime())) {
+ materializedViewsRegistry.refreshMaterializedView(hiveConf,
existingMVTable, mvTable);
}
+ } else {
+ // Simply replace if it still does not exist
+ materializedViewsRegistry.refreshMaterializedView(hiveConf, null,
mvTable);
}
- LOG.info("Materialized views registry has been refreshed");
- } else {
- for (Table mvTable : db.getAllMaterializedViewObjectsForRewriting())
{
- refreshMaterializedView(db.getConf(), null, mvTable);
+ }
+
+ for (HiveRelOptMaterialization materialization :
materializedViewsRegistry.getRewritingMaterializedViews()) {
+ Table mvTableInCache =
HiveMaterializedViewUtils.extractTable(materialization);
+ Table mvTableInHMS = materializedViewObjects.stream()
+ .filter(table ->
table.getDbName().equals(mvTableInCache.getDbName())
+ &&
table.getTableName().equals(mvTableInCache.getTableName()))
+ .findAny()
+ .orElse(null);
+
+ if (mvTableInHMS == null) {
+ materializedViewsRegistry.dropMaterializedView(mvTableInCache);
}
- initialized.set(true);
- LOG.info("Materialized views registry has been initialized");
}
+
+ LOG.info("Materialized views registry has been refreshed");
} catch (HiveException e) {
- if (initialized.get()) {
- LOG.error("Problem connecting to the metastore when refreshing the
view registry", e);
- } else {
- LOG.error("Problem connecting to the metastore when initializing the
view registry", e);
- }
+ LOG.error("Problem connecting to the metastore when refreshing the
view registry", e);
}
perfLogger.perfLogEnd(CLASS_NAME,
PerfLogger.MATERIALIZED_VIEWS_REGISTRY_REFRESH);
}
+
+ private PerfLogger getPerfLogger() {
+ SessionState ss = new SessionState(hiveConf);
+ ss.setIsHiveServerQuery(true); // All is served from HS2, we do not need
e.g. Tez sessions
+ SessionState.start(ss);
+ return SessionState.getPerfLogger();
+ }
Review comment:
This was added by https://issues.apache.org/jira/browse/HIVE-21344 and I
think the intention was to monitor the refresh rate.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 717787)
Time Spent: 1h 50m (was: 1h 40m)
> Materialized view registry does not clean non existing views at refresh
> -----------------------------------------------------------------------
>
> Key: HIVE-25900
> URL: https://issues.apache.org/jira/browse/HIVE-25900
> Project: Hive
> Issue Type: Bug
> Components: Materialized views
> Reporter: Krisztian Kasa
> Assignee: Krisztian Kasa
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1h 50m
> Remaining Estimate: 0h
>
> CBO plans of materialized views which are enabled for query rewrite are
> cached in HS2 (MaterializedViewsCache, HiveMaterializedViewsRegistry)
> The registry is refreshed periodically from HMS:
> {code:java}
> set hive.server2.materializedviews.registry.refresh.period=1500s;
> {code}
> This functionality is required when multiple HS2 instances are used in a
> cluster: MV drop operation is served by one of the HS2 instances and the
> registry is updated at that time in that instance. However other HS2
> instances still cache the non-existent view and need to be refreshed by the
> updater thread.
> Currently the updater thread adds new entries, refresh existing ones but does
> not remove the outdated entries.
--
This message was sent by Atlassian Jira
(v8.20.1#820001)