nsivarajan commented on code in PR #63067: URL: https://github.com/apache/doris/pull/63067#discussion_r3297035007
########## fe/fe-core/src/main/java/org/apache/doris/statistics/query/QueryStatsRecorder.java: ########## @@ -0,0 +1,212 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics.query; + +import org.apache.doris.analysis.StatementBase; +import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.common.Config; +import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.glue.LogicalPlanAdapter; +import org.apache.doris.nereids.trees.expressions.ExprId; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.commands.Command; +import org.apache.doris.nereids.trees.plans.physical.PhysicalFilter; +import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterializeOlapScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; +import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; +import org.apache.doris.qe.ConnectContext; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.HashMap; +import java.util.Map; + +/** + * Records column-level query-hit and filter-hit statistics from the Nereids physical plan. + * Called once per query in NereidsPlanner after plan translation. + * + * <p>Scope (Part 1): + * <ul> + * <li>queryHit: base SELECT columns whose ExprId flows straight through to the root + * plan's output without rewriting. Columns hidden by an alias, an expression, + * or an aggregate function are NOT recorded yet (Part 2).</li> + * <li>filterHit: columns referenced in WHERE predicate conjuncts.</li> + * <li>Only OlapTable scans are recorded; external tables (Hive, Iceberg, JDBC, …) are not.</li> + * <li>DML, EXPLAIN, and internal queries (e.g. auto-analyze) are skipped.</li> + * <li>Per query, each table's count is incremented at most once regardless of scan count.</li> + * </ul> + * GROUP BY, ORDER BY, window, JOIN, and aliased/projected columns are deferred to Part 2. + */ +public class QueryStatsRecorder { + private static final Logger LOG = LogManager.getLogger(QueryStatsRecorder.class); + + private QueryStatsRecorder() {} + + public static void record(PhysicalPlan plan, StatementContext stmtContext) { + if (!shouldRecord(stmtContext)) { + return; + } + if (stmtContext.isQueryStatsRecorded()) { + return; + } + // Set the latch before the work so a partial-failure retry does not double-count. + stmtContext.markQueryStatsRecorded(); + try { + Map<String, StatsDelta> deltas = collectDeltas(plan); + for (StatsDelta delta : deltas.values()) { + if (!delta.empty()) { + try { + Env.getCurrentEnv().getQueryStats().addStats(delta); + } catch (Exception e) { + ConnectContext cc = stmtContext.getConnectContext(); + String queryId = (cc != null && cc.queryId() != null) + ? cc.queryId().toString() : "unknown"; + LOG.warn("Failed to record query stats for query={}", queryId, e); + } + } + } + } catch (Exception e) { + ConnectContext cc = stmtContext.getConnectContext(); + String queryId = (cc != null && cc.queryId() != null) + ? cc.queryId().toString() : "unknown"; + LOG.warn("Failed to build query stats deltas for query={}", queryId, e); + } + } + + /** + * Builds the per-table StatsDelta map from the physical plan. + * Package-private so unit tests can verify recording logic without touching Env. + */ + static Map<String, StatsDelta> collectDeltas(PhysicalPlan plan) { + Map<ExprId, PhysicalOlapScan> exprIdToScan = new HashMap<>(); + Map<String, StatsDelta> deltas = new HashMap<>(); + walkPlan(plan, exprIdToScan, deltas); + if (exprIdToScan.isEmpty()) { + return deltas; + } + for (Slot slot : plan.getOutput()) { + if (!(slot instanceof SlotReference)) { + continue; + } + SlotReference sr = (SlotReference) slot; + PhysicalOlapScan sourceScan = exprIdToScan.get(sr.getExprId()); + if (sourceScan == null) { + continue; + } + StatsDelta delta = getOrCreateDelta(deltas, sourceScan); + if (delta != null) { + sr.getOriginalColumn().ifPresent(col -> delta.addQueryStats(col.getName())); + } + } + return deltas; + } + + // Package-private for testing. + static boolean shouldRecord(StatementContext ctx) { + if (!Config.enable_query_hit_stats) { + return false; + } + ConnectContext connectContext = ctx.getConnectContext(); + if (connectContext != null && connectContext.getState().isInternal()) { + return false; + } + StatementBase stmt = ctx.getParsedStatement(); + if (stmt == null || stmt.isExplain()) { + return false; + } + // isInsert guards INSERT INTO … SELECT: parsedStmt may be the SELECT sub-plan, + // not the INSERT Command, when NereidsPlanner re-enters for execution planning. + if (ctx.isInsert()) { + return false; + } + if (stmt instanceof LogicalPlanAdapter + && ((LogicalPlanAdapter) stmt).getLogicalPlan() instanceof Command) { + return false; + } + return true; + } + + /** + * Single-pass tree walk: registers scan output slots into exprIdToScan, + * and records filterHit for PhysicalFilter conjuncts. + * Children are visited before the current node so scans are registered + * before parent filters look them up. + * PhysicalLazyMaterializeOlapScan is checked before PhysicalOlapScan + * because it is a subclass; the inner scan's metadata must be used. + */ + private static void walkPlan(Plan plan, + Map<ExprId, PhysicalOlapScan> exprIdToScan, + Map<String, StatsDelta> deltas) { + if (plan instanceof PhysicalLazyMaterializeOlapScan) { + PhysicalOlapScan inner = + ((PhysicalLazyMaterializeOlapScan) plan).getScan(); + for (Slot slot : plan.getOutput()) { Review Comment: this is addressed in Part 2 (which uses the correct PhysicalDeferMaterializeOlapScan class and handles the full lazy materialization lifecycle properly). Part2 plan: Alias resolution, GROUP BY queryHit, ORDER BY queryHit, Window PARTITION BY / ORDER BY queryHit and Lazy scan -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
