englefly commented on code in PR #64535: URL: https://github.com/apache/doris/pull/64535#discussion_r3432905621
########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathNormalizer.java: ########## @@ -0,0 +1,702 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.analysis.ColumnAccessPathType; +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.rules.rewrite.NestedColumnPruning.DataTypeAccessTree; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.StructField; +import org.apache.doris.nereids.types.StructType; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Multimap; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Optional; + +/** + * Normalizes and strips redundant metadata-only (NULL/OFFSET) access paths + * so that BE readers receive a consistent, conflict-free set of paths. + * + * <p>Single entry point: {@link #normalizeAndStrip(Slot, DataTypeAccessTree, Multimap, Multimap)}. + */ +public final class MetaPathNormalizer { + + private MetaPathNormalizer() {} + + /** + * Normalize map value meta-only access paths, then strip redundant NULL/OFFSET + * paths. These two steps must always run together: normalize rewrites + * {@code [m, *, META]} into {@code [m, KEYS] + [m, VALUES, META]}, and strip + * removes paths that are covered by deeper or higher-priority paths. + */ + public static void normalizeAndStrip( + Slot slot, DataTypeAccessTree accessTree, + Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> targetAccessPaths, + Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> coveringAccessPaths) { + normalizeMapValueMetaOnlyAccessPaths(slot, accessTree, targetAccessPaths); + stripCoveredMetaPaths(slot, targetAccessPaths, coveringAccessPaths); + } + + // ======================================================================== + // isMetaPath + // ======================================================================== + + private static boolean isMetaPath(List<String> path) { + if (path.isEmpty()) { + return false; + } + String lastComponent = path.get(path.size() - 1); + return AccessPathInfo.ACCESS_NULL.equals(lastComponent) + || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent); + } + + // ======================================================================== + // Normalize: map value meta-only access paths + // ======================================================================== + + private static void normalizeMapValueMetaOnlyAccessPaths( + Slot slot, DataTypeAccessTree accessTree, + Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> accessPaths) { + int slotId = slot.getExprId().asInt(); + normalizeMapValueMetaPathHelper(slotId, + accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_OFFSET), + accessPaths, AccessPathInfo.ACCESS_OFFSET); + normalizeMapValueMetaPathHelper(slotId, + accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_NULL), + accessPaths, AccessPathInfo.ACCESS_NULL); + } + + /** + * Normalize map value meta-only (OFFSET or NULL) star access paths for a single meta type. + * <p>For each map prefix where {@code [prefix, *, META]} exists, replaces it with + * {@code [prefix, KEYS]} plus {@code [prefix, VALUES, META]}, so that keys are read fully + * for element lookup while values only read the metadata. + */ + private static void normalizeMapValueMetaPathHelper( + int slotId, List<List<String>> mapPrefixes, + Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> accessPaths, + String metaSuffix) { + if (mapPrefixes.isEmpty()) { + return; + } + + Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths = accessPaths.get(slotId); + List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new ArrayList<>(); + List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new ArrayList<>(); + for (List<String> mapPrefix : mapPrefixes) { + List<String> starMetaPath = new ArrayList<>(mapPrefix); + starMetaPath.add(AccessPathInfo.ACCESS_ALL); + starMetaPath.add(metaSuffix); + + for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) { + if (!p.second.equals(starMetaPath)) { + continue; + } + pathsToRemove.add(p); + + List<String> keysPath = new ArrayList<>(mapPrefix); + keysPath.add(AccessPathInfo.ACCESS_MAP_KEYS); + pathsToAdd.add(Pair.of(p.first, keysPath)); + + List<String> valuesMetaPath = new ArrayList<>(mapPrefix); + valuesMetaPath.add(AccessPathInfo.ACCESS_MAP_VALUES); + valuesMetaPath.add(metaSuffix); + pathsToAdd.add(Pair.of(p.first, valuesMetaPath)); + } + } + slotPaths.removeAll(pathsToRemove); + slotPaths.addAll(pathsToAdd); + } + + // ======================================================================== + // Strip: remove redundant meta paths + // ======================================================================== + + /** + * Strip redundant metadata-only NULL/OFFSET paths, keeping enough real paths for BE + * readers to avoid OFFSET_ONLY / NULL_MAP_ONLY modes that skip required child data. + * + * <p>Stripping is organised in two levels: + * + * <p><b>Level 1 — Same-prefix priority:</b> when two paths target the same + * column/subcolumn and differ only in the suffix, the higher-priority suffix + * eliminates the lower. + * <pre>{@code + * [prefix] > [prefix, OFFSET] > [prefix, NULL] + * }</pre> + * <ul> + * <li>{@code [prefix]} strips {@code [prefix, OFFSET]} and + * {@code [prefix, NULL]} — full access covers all metadata.</li> + * <li>{@code [prefix, OFFSET]} strips {@code [prefix, NULL]} — offset read + * provides null information for variable-length columns.</li> + * </ul> + * + * <p><b>Level 2 — Deeper-prefix coverage:</b> when a covering path has a prefix + * that strictly contains the target path's prefix, traversing the deeper container + * already materialises the container, making the shorter-prefix meta path redundant. + * <ul> + * <li>Target suffix {@code OFFSET}, covered by deeper prefix: + * <ul> + * <li>{@code Data}: {@code [a, *, field]} strips {@code [a, OFFSET]}.</li> + * <li>{@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a, OFFSET]}.</li> + * <li>{@code NULL}: {@code [a, *, NULL]} strips {@code [a, OFFSET]}.</li> + * </ul> + * </li> + * <li>Target suffix {@code NULL}, covered by deeper prefix: + * <ul> + * <li>{@code Data}: {@code [a, b, c]} strips {@code [a, b, NULL]}.</li> + * <li>{@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a, NULL]}.</li> + * <li>{@code NULL}: {@code [a, *, NULL]} strips {@code [a, NULL]}.</li> + * </ul> + * </li> + * </ul> + * + * <p>Map OFFSET stripping may need supplemental key paths (e.g. + * {@code [m, *, OFFSET]} + {@code [m, VALUES]} becomes {@code [m, KEYS]} + + * {@code [m, VALUES]}), handled via + * {@link #compareMetaPathPrefixCoverage} and {@link #buildMapKeysOnlyPath}. + */ + private static void stripCoveredMetaPaths( + Slot slot, Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> targetAccessPaths, + Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> coveringAccessPaths) { + //stripCoveredMetaPaths + // Level 1: same-prefix priority — [prefix] > [prefix, OFFSET] > [prefix, NULL] + // ├── stripExactPrefixCoveredMetaPaths full access covers both + // └── stripNullBySamePrefixOffset OFFSET covers NULL + // + // Level 2: deeper-prefix coverage + // ├── stripMetaPathsByDeeperPrefix(OFFSET) deeper data/meta → OFFSET + // └── stripMetaPathsByDeeperPrefix(NULL) deeper data/meta → NULL + // + // Special + // └── stripCoveredArrayNullMetaPaths array NULL + map key supplementation + + stripExactPrefixCoveredMetaPaths(slot, targetAccessPaths, coveringAccessPaths); + stripNullBySamePrefixOffset(slot, targetAccessPaths); + + stripMetaPathsByDeeperPrefix(slot, AccessPathInfo.ACCESS_OFFSET, + targetAccessPaths, coveringAccessPaths); + stripMetaPathsByDeeperPrefix(slot, AccessPathInfo.ACCESS_NULL, + targetAccessPaths, coveringAccessPaths); + + stripCoveredArrayNullMetaPaths(slot, targetAccessPaths, coveringAccessPaths); + } + + /** + * Level 1 — same-prefix OFFSET strips NULL: {@code [prefix, OFFSET]} strips + * {@code [prefix, NULL]}. Uses type-aware comparison so that map-level + * {@code *}/VALUES equivalence is recognized + * (e.g. {@code [m, *, *, OFFSET]} strips {@code [m, VALUES, *, NULL]}). + */ + private static void stripNullBySamePrefixOffset( + Slot slot, Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> allAccessPaths) { + int slotId = slot.getExprId().asInt(); + Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths = allAccessPaths.get(slotId); + if (slotPaths.isEmpty()) { + return; + } + + List<Pair<ColumnAccessPathType, List<String>>> toRemove = new ArrayList<>(); + for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) { + List<String> path = p.second; + if (path.isEmpty() || !AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) { + continue; + } + List<String> prefix = path.subList(0, path.size() - 1); + for (Pair<ColumnAccessPathType, List<String>> q : slotPaths) { + List<String> other = q.second; + if (other == path || other.isEmpty()) { + continue; + } + if (other.size() != path.size() + || !AccessPathInfo.ACCESS_OFFSET.equals(other.get(other.size() - 1))) { + continue; + } + List<String> otherPrefix = other.subList(0, other.size() - 1); + OffsetPathRewrite rewrite = compareMetaPathPrefixCoverage( + slot.getDataType(), prefix, otherPrefix); + if (rewrite.shouldRemoveOffsetPath()) { + toRemove.add(p); + break; + } + } + } + for (Pair<ColumnAccessPathType, List<String>> r : toRemove) { + allAccessPaths.remove(slotId, r); + } + } + + /** + * Decide whether an OFFSET-suffix path can be removed because another path already covers + * the same container. + */ + private static OffsetPathRewrite analyzeOffsetPathRewrite( + DataType slotType, List<String> path, List<List<String>> coveringPaths) { + if (path.isEmpty() + || !AccessPathInfo.ACCESS_OFFSET.equals(path.get(path.size() - 1))) { + return OffsetPathRewrite.keep(); + } + List<String> prefix = path.subList(0, path.size() - 1); + List<List<String>> filteredCoveringPaths = new ArrayList<>(); + for (List<String> p : coveringPaths) { + if (!p.equals(path)) { + filteredCoveringPaths.add(p); + } + } + return analyzePrefixCoverage(slotType, prefix, filteredCoveringPaths); + } + + private static OffsetPathRewrite analyzePrefixCoverage( + DataType slotType, List<String> prefix, List<List<String>> coveringPaths) { + List<List<String>> supplementalPaths = new ArrayList<>(); + for (List<String> coveringPath : coveringPaths) { + OffsetPathRewrite candidate = compareMetaPathPrefixCoverage(slotType, prefix, coveringPath); + if (!candidate.shouldRemoveOffsetPath()) { + continue; + } + if (candidate.getSupplementalPaths().isEmpty()) { + return OffsetPathRewrite.remove(); + } + supplementalPaths.addAll(candidate.getSupplementalPaths()); + } + if (supplementalPaths.isEmpty()) { + return OffsetPathRewrite.keep(); + } + return OffsetPathRewrite.rewriteWithSupplementalPaths(supplementalPaths); + } + + /** + * Level 2 — strip target paths ending with {@code metaSuffix} + * (OFFSET or NULL) when a deeper-prefix path covers them. + * + * <p>Data covering paths collected from both {@code coveringAccessPaths} + * and {@code targetAccessPaths} are compared against target prefixes via + * {@link #compareMetaPathPrefixCoverage}. OFFEST targets additionally + * use {@link #stripCoveredOffsetByPaths} for map key supplementation. + * + * <p>Meta covering paths (OFFSET/NULL) are collected from both sources + * and delegated to {@link #stripCoveredMetaByPrefix}. + */ + private static void stripMetaPathsByDeeperPrefix( + Slot slot, String metaSuffix, + Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> targetAccessPaths, + Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> coveringAccessPaths) { + int slotId = slot.getExprId().asInt(); + Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths = targetAccessPaths.get(slotId); + if (targetPaths.isEmpty()) { + return; + } + + // Collect data (non-meta) paths from both covering and target. + List<List<String>> dataPaths = new ArrayList<>(); + for (Pair<ColumnAccessPathType, List<String>> p : coveringAccessPaths.get(slotId)) { + if (!p.second.isEmpty() && !isMetaPath(p.second)) { + dataPaths.add(p.second); + } + } + for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) { + if (!p.second.isEmpty() && !isMetaPath(p.second)) { + dataPaths.add(p.second); + } + } + + if (AccessPathInfo.ACCESS_OFFSET.equals(metaSuffix)) { + // OFFSET: use stripCoveredOffsetByPaths for map key supplementation. + stripCoveredOffsetByPaths(slot, targetAccessPaths, dataPaths); + } else { + // NULL: inline check — remove NULL targets covered by data paths. + List<Pair<ColumnAccessPathType, List<String>>> toRemove = new ArrayList<>(); + for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) { + List<String> path = p.second; + if (path.isEmpty() || !AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) { + continue; + } + List<String> prefix = path.subList(0, path.size() - 1); + for (List<String> other : dataPaths) { + if (other.size() >= prefix.size() + && compareMetaPathPrefixCoverage( + slot.getDataType(), prefix, other) + .shouldRemoveOffsetPath()) { Review Comment: [m, VALUES] 削 [m, *, NULL] -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
