github-actions[bot] commented on code in PR #64535:
URL: https://github.com/apache/doris/pull/64535#discussion_r3433398415


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MetaPathNormalizer.java:
##########
@@ -0,0 +1,581 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.analysis.ColumnAccessPathType;
+import org.apache.doris.common.Pair;
+import 
org.apache.doris.nereids.rules.rewrite.NestedColumnPruning.DataTypeAccessTree;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.MapType;
+import org.apache.doris.nereids.types.StructField;
+import org.apache.doris.nereids.types.StructType;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Normalizes and strips redundant metadata-only (NULL/OFFSET) access paths
+ * so that BE readers receive a consistent, conflict-free set of paths.
+ *
+ * <p>Single entry point: {@link #normalizeAndStrip(Slot, DataTypeAccessTree, 
Multimap, Multimap)}.
+ */
+public final class MetaPathNormalizer {
+
+    private MetaPathNormalizer() {}
+
+    /**
+     * Normalize map value meta-only access paths, then strip redundant 
NULL/OFFSET
+     * paths. These two steps must always run together: normalize rewrites
+     * {@code [m, *, META]} into {@code [m, KEYS] + [m, VALUES, META]}, and 
strip
+     * removes paths that are covered by deeper or higher-priority paths.
+     */
+    public static void normalizeAndStrip(
+            Slot slot, DataTypeAccessTree accessTree,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
targetAccessPaths,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
coveringAccessPaths) {
+        normalizeMapValueMetaOnlyAccessPaths(slot, accessTree, 
targetAccessPaths);
+        stripCoveredMetaPaths(slot, targetAccessPaths, coveringAccessPaths);
+    }
+
+    // ========================================================================
+    // isMetaPath
+    // ========================================================================
+
+    private static boolean isMetaPath(List<String> path) {
+        if (path.isEmpty()) {
+            return false;
+        }
+        String lastComponent = path.get(path.size() - 1);
+        return AccessPathInfo.ACCESS_NULL.equals(lastComponent)
+                || AccessPathInfo.ACCESS_OFFSET.equals(lastComponent);
+    }
+
+    private static List<List<String>> collectPaths(
+            Collection<Pair<ColumnAccessPathType, List<String>>> a,
+            Collection<Pair<ColumnAccessPathType, List<String>>> b, boolean 
meta) {
+        List<List<String>> result = new ArrayList<>();
+        for (Pair<ColumnAccessPathType, List<String>> p : a) {
+            if (!p.second.isEmpty() && isMetaPath(p.second) == meta) {
+                result.add(p.second);
+            }
+        }
+        for (Pair<ColumnAccessPathType, List<String>> p : b) {
+            if (!p.second.isEmpty() && isMetaPath(p.second) == meta) {
+                result.add(p.second);
+            }
+        }
+        return result;
+    }
+
+    // ========================================================================
+    // Normalize: map value meta-only access paths
+    // ========================================================================
+
+    private static void normalizeMapValueMetaOnlyAccessPaths(
+            Slot slot, DataTypeAccessTree accessTree,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
accessPaths) {
+        int slotId = slot.getExprId().asInt();
+        normalizeMapValueMetaPathHelper(slotId,
+                
accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_OFFSET),
+                accessPaths, AccessPathInfo.ACCESS_OFFSET);
+        normalizeMapValueMetaPathHelper(slotId,
+                
accessTree.collectMapValueMetaOnlyAccessPaths(AccessPathInfo.ACCESS_NULL),
+                accessPaths, AccessPathInfo.ACCESS_NULL);
+    }
+
+    /**
+     * Normalize map value meta-only (OFFSET or NULL) star access paths for a 
single meta type.
+     * <p>For each map prefix where {@code [prefix, *, META]} exists, replaces 
it with
+     * {@code [prefix, KEYS]} plus {@code [prefix, VALUES, META]}, so that 
keys are read fully
+     * for element lookup while values only read the metadata.
+     */
+    private static void normalizeMapValueMetaPathHelper(
+            int slotId, List<List<String>> mapPrefixes,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
accessPaths,
+            String metaSuffix) {
+        if (mapPrefixes.isEmpty()) {
+            return;
+        }
+
+        Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths = 
accessPaths.get(slotId);
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new 
ArrayList<>();
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new 
ArrayList<>();
+        for (List<String> mapPrefix : mapPrefixes) {
+            List<String> starMetaPath = new ArrayList<>(mapPrefix);
+            starMetaPath.add(AccessPathInfo.ACCESS_ALL);
+            starMetaPath.add(metaSuffix);
+
+            for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
+                if (!p.second.equals(starMetaPath)) {
+                    continue;
+                }
+                pathsToRemove.add(p);
+
+                List<String> keysPath = new ArrayList<>(mapPrefix);
+                keysPath.add(AccessPathInfo.ACCESS_MAP_KEYS);
+                pathsToAdd.add(Pair.of(p.first, keysPath));
+
+                List<String> valuesMetaPath = new ArrayList<>(mapPrefix);
+                valuesMetaPath.add(AccessPathInfo.ACCESS_MAP_VALUES);
+                valuesMetaPath.add(metaSuffix);
+                pathsToAdd.add(Pair.of(p.first, valuesMetaPath));
+            }
+        }
+        slotPaths.removeAll(pathsToRemove);
+        slotPaths.addAll(pathsToAdd);
+    }
+
+    // ========================================================================
+    // Strip: remove redundant meta paths
+    // ========================================================================
+
+    /**
+     * Strip redundant metadata-only NULL/OFFSET paths, keeping enough real 
paths for BE
+     * readers to avoid OFFSET_ONLY / NULL_MAP_ONLY modes that skip required 
child data.
+     *
+     * <p>Stripping is organised in two levels:
+     *
+     * <p><b>Level 1 — Same-prefix priority:</b> when two paths target the same
+     * column/subcolumn and differ only in the suffix, the higher-priority 
suffix
+     * eliminates the lower.
+     * <pre>{@code
+     *   [prefix]  >  [prefix, OFFSET]  >  [prefix, NULL]
+     * }</pre>
+     * <ul>
+     *   <li>{@code [prefix]} strips {@code [prefix, OFFSET]} and
+     *       {@code [prefix, NULL]} — full access covers all metadata.</li>
+     *   <li>{@code [prefix, OFFSET]} strips {@code [prefix, NULL]} — offset 
read
+     *       provides null information for variable-length columns.</li>
+     * </ul>
+     *
+     * <p><b>Level 2 — Deeper-prefix coverage:</b> when a covering path has a 
prefix
+     * that strictly contains the target path's prefix, traversing the deeper 
container
+     * already materialises the container, making the shorter-prefix meta path 
redundant.
+     * <ul>
+     *   <li>Target suffix {@code OFFSET}, covered by deeper prefix:
+     *     <ul>
+     *       <li>{@code Data}:   {@code [a, *, field]}  strips {@code [a, 
OFFSET]}.</li>
+     *       <li>{@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a, 
OFFSET]}.</li>
+     *       <li>{@code NULL}:   {@code [a, *, NULL]}   strips {@code [a, 
OFFSET]}.</li>
+     *     </ul>
+     *   </li>
+     *   <li>Target suffix {@code NULL}, covered by deeper prefix:
+     *     <ul>
+     *       <li>{@code Data}:   {@code [a, b, c]}      strips {@code [a, b, 
NULL]}.</li>
+     *       <li>{@code OFFSET}: {@code [a, *, OFFSET]} strips {@code [a, 
NULL]}.</li>
+     *       <li>{@code NULL}:   {@code [a, *, NULL]}   strips {@code [a, 
NULL]}.</li>
+     *     </ul>
+     *   </li>
+     * </ul>
+     *
+     * <p>Map OFFSET stripping may need supplemental key paths (e.g.
+     * {@code [m, *, OFFSET]} + {@code [m, VALUES]} becomes {@code [m, KEYS]} +
+     * {@code [m, VALUES]}), handled via
+     * {@link #compareMetaPathPrefixCoverage} and {@link 
#buildMapKeysOnlyPath}.
+     */
+    private static void stripCoveredMetaPaths(
+            Slot slot, Multimap<Integer, Pair<ColumnAccessPathType, 
List<String>>> targetAccessPaths,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
coveringAccessPaths) {
+        //stripCoveredMetaPaths
+        //    Level 1: same-prefix priority — [prefix] > [prefix, OFFSET] > 
[prefix, NULL]
+        //      ├── stripExactPrefixCoveredMetaPaths     full access covers 
both
+        //      └── stripNullBySamePrefixOffset          OFFSET covers NULL
+        //
+        //    Level 2: deeper-prefix coverage
+        //      ├── stripMetaPathsByDeeperPrefix(OFFSET) deeper data/meta → 
OFFSET
+        //      └── stripMetaPathsByDeeperPrefix(NULL)   deeper data/meta → 
NULL
+        stripExactPrefixCoveredMetaPaths(slot, targetAccessPaths, 
coveringAccessPaths);
+        stripNullBySamePrefixOffset(slot, targetAccessPaths);
+
+        stripMetaPathsByDeeperPrefix(slot, AccessPathInfo.ACCESS_OFFSET,
+                targetAccessPaths, coveringAccessPaths);
+        stripMetaPathsByDeeperPrefix(slot, AccessPathInfo.ACCESS_NULL,
+                targetAccessPaths, coveringAccessPaths);
+    }
+
+    /**
+     * Level 1 — same-prefix OFFSET strips NULL: {@code [prefix, OFFSET]} 
strips
+     * {@code [prefix, NULL]}. Uses type-aware comparison so that map-level
+     * {@code *}/VALUES equivalence is recognized
+     * (e.g. {@code [m, *, *, OFFSET]} strips {@code [m, VALUES, *, NULL]}).
+     */
+    private static void stripNullBySamePrefixOffset(
+            Slot slot, Multimap<Integer, Pair<ColumnAccessPathType, 
List<String>>> allAccessPaths) {
+        int slotId = slot.getExprId().asInt();
+        Collection<Pair<ColumnAccessPathType, List<String>>> slotPaths = 
allAccessPaths.get(slotId);
+        if (slotPaths.isEmpty()) {
+            return;
+        }
+
+        List<Pair<ColumnAccessPathType, List<String>>> toRemove = new 
ArrayList<>();
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new 
ArrayList<>();
+        for (Pair<ColumnAccessPathType, List<String>> p : slotPaths) {
+            List<String> path = p.second;
+            if (path.isEmpty() || 
!AccessPathInfo.ACCESS_NULL.equals(path.get(path.size() - 1))) {
+                continue;
+            }
+            List<String> prefix = path.subList(0, path.size() - 1);
+            for (Pair<ColumnAccessPathType, List<String>> q : slotPaths) {
+                List<String> other = q.second;
+                if (other == path || other.isEmpty()) {
+                    continue;
+                }
+                if (other.size() != path.size()
+                        || 
!AccessPathInfo.ACCESS_OFFSET.equals(other.get(other.size() - 1))) {
+                    continue;
+                }
+                List<String> otherPrefix = other.subList(0, other.size() - 1);
+                OffsetPathRewrite rewrite = compareMetaPathPrefixCoverage(
+                        slot.getDataType(), prefix, otherPrefix);
+                if (rewrite.shouldRemoveOffsetPath()) {
+                    toRemove.add(p);
+                    for (List<String> supplementalPath : 
rewrite.getSupplementalPaths()) {
+                        pathsToAdd.add(Pair.of(p.first, supplementalPath));
+                    }
+                    break;
+                }
+            }
+        }
+        for (Pair<ColumnAccessPathType, List<String>> r : toRemove) {
+            allAccessPaths.remove(slotId, r);
+        }
+        allAccessPaths.putAll(slotId, pathsToAdd);
+    }
+
+    private static OffsetPathRewrite analyzePrefixCoverage(
+            DataType slotType, List<String> prefix, List<List<String>> 
dataPaths) {
+        List<List<String>> supplementalPaths = new ArrayList<>();
+        for (List<String> dataPath : dataPaths) {
+            OffsetPathRewrite candidate = 
compareMetaPathPrefixCoverage(slotType, prefix, dataPath);
+            if (!candidate.shouldRemoveOffsetPath()) {
+                continue;
+            }
+            if (candidate.getSupplementalPaths().isEmpty()) {
+                return OffsetPathRewrite.remove();
+            }
+            supplementalPaths.addAll(candidate.getSupplementalPaths());
+        }
+        if (supplementalPaths.isEmpty()) {
+            return OffsetPathRewrite.keep();
+        }
+        return 
OffsetPathRewrite.rewriteWithSupplementalPaths(supplementalPaths);
+    }
+
+    /**
+     * Level 2 — strip target paths ending with {@code metaSuffix}
+     * (OFFSET or NULL) when a deeper-prefix path covers them.
+     *
+     * <p>Data covering paths: delegated to
+     * {@link #stripMetaByDeeperDataPaths}.
+     *
+     * <p>Meta covering paths: collected from both sources and delegated
+     * to {@link #stripMetaByDeeperMetaPaths}.
+     */
+    private static void stripMetaPathsByDeeperPrefix(
+            Slot slot, String metaSuffix,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
targetAccessPaths,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
coveringAccessPaths) {
+        int slotId = slot.getExprId().asInt();
+        Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths = 
targetAccessPaths.get(slotId);
+        if (targetPaths.isEmpty()) {
+            return;
+        }
+        Collection<Pair<ColumnAccessPathType, List<String>>> coveringPaths =
+                coveringAccessPaths.get(slotId);
+
+        List<List<String>> dataPaths = collectPaths(coveringPaths, 
targetPaths, false);
+        stripMetaByDeeperDataPaths(slot, targetAccessPaths, dataPaths, 
metaSuffix);
+
+        List<List<String>> metaPaths = collectPaths(coveringPaths, 
targetPaths, true);
+        stripMetaByDeeperMetaPaths(slot.getDataType(), slotId, metaSuffix,
+                metaPaths, targetAccessPaths);
+    }
+
+    /**
+     * Level 2 — for each target path ending with {@code metaSuffix}, remove it
+     * when a covering meta path has a deeper prefix after both paths have 
their
+     * meta suffixes stripped. Same-prefix cross-type is handled by
+     * {@link #stripNullBySamePrefixOffset} instead.
+     */
+    private static void stripMetaByDeeperMetaPaths(
+            DataType slotType, int slotId, String metaSuffix,
+            List<List<String>> coveringPaths,
+            Multimap<Integer, Pair<ColumnAccessPathType, List<String>>> 
targetAccessPaths) {
+        Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths =
+                targetAccessPaths.get(slotId);
+        if (targetPaths.isEmpty() || coveringPaths.isEmpty()) {
+            return;
+        }
+
+        List<Pair<ColumnAccessPathType, List<String>>> toRemove = new 
ArrayList<>();
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new 
ArrayList<>();
+        for (Pair<ColumnAccessPathType, List<String>> p : targetPaths) {
+            List<String> targetPath = p.second;
+            if (targetPath.isEmpty() || 
!metaSuffix.equals(targetPath.get(targetPath.size() - 1))) {
+                continue;
+            }
+            List<String> targetPrefix = targetPath.subList(0, 
targetPath.size() - 1);
+            for (List<String> coveringPath : coveringPaths) {
+                if (coveringPath == targetPath || coveringPath.isEmpty()) {
+                    continue;
+                }
+                // Strip meta suffix from both and compare prefix depth: only
+                // strictly deeper covering paths subsume the target. 
Same-prefix
+                // cross-type is handled by Level-1 priority (OFFSET > NULL).
+                if (coveringPath.size() - 1 <= targetPath.size() - 1) {
+                    continue;
+                }
+                OffsetPathRewrite rewrite = compareMetaPathPrefixCoverage(
+                        slotType, targetPrefix, coveringPath);
+                if (rewrite.shouldRemoveOffsetPath()) {
+                    toRemove.add(p);
+                    for (List<String> supplementalPath : 
rewrite.getSupplementalPaths()) {
+                        pathsToAdd.add(Pair.of(p.first, supplementalPath));
+                    }
+                    break;
+                }
+            }
+        }
+        for (Pair<ColumnAccessPathType, List<String>> r : toRemove) {
+            targetAccessPaths.remove(slotId, r);
+        }
+        targetAccessPaths.putAll(slotId, pathsToAdd);
+    }
+
+    /**
+     * Strip target paths ending with {@code metaSuffix} (OFFSET or NULL) when 
a
+     * data path deeper than or equal to the target prefix already covers the
+     * same container. Handles map key supplementation when the covering path
+     * accesses only VALUES while the target prefix uses {@code *} (≡ VALUES).
+     */
+    private static void stripMetaByDeeperDataPaths(
+            Slot slot, Multimap<Integer, Pair<ColumnAccessPathType, 
List<String>>> targetAccessPaths,
+            List<List<String>> dataPaths, String metaSuffix) {
+        int slotId = slot.getExprId().asInt();
+        Collection<Pair<ColumnAccessPathType, List<String>>> targetPaths = 
targetAccessPaths.get(slotId);
+        if (targetPaths.isEmpty() || dataPaths.isEmpty()) {
+            return;
+        }
+
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToRemove = new 
ArrayList<>();
+        List<Pair<ColumnAccessPathType, List<String>>> pathsToAdd = new 
ArrayList<>();
+        for (Pair<ColumnAccessPathType, List<String>> p : new 
ArrayList<>(targetPaths)) {
+            List<String> path = p.second;
+            if (path.isEmpty() || !metaSuffix.equals(path.get(path.size() - 
1))) {
+                continue;
+            }
+            List<String> prefix = path.subList(0, path.size() - 1);
+            OffsetPathRewrite rewrite = analyzePrefixCoverage(
+                    slot.getDataType(), prefix, dataPaths);
+            if (!rewrite.shouldRemoveOffsetPath()) {
+                continue;
+            }
+            pathsToRemove.add(p);
+            for (List<String> supplementalPath : 
rewrite.getSupplementalPaths()) {
+                pathsToAdd.add(Pair.of(p.first, supplementalPath));
+            }
+        }
+        targetPaths.removeAll(pathsToRemove);
+        targetPaths.addAll(pathsToAdd);

Review Comment:
   This pass can add a supplemental full key path after  has already run, so an 
existing key-side meta path can survive next to the new full key 
path.\n\nReduced plan:\n\n\n\nThe collector records , , and .  removes  because 
 covers the value side, then adds supplemental  here. Since the exact-prefix 
cleanup already ran, the final paths can contain both  and . BE  forwards those 
as  and ;  strips , sees , enters , and does not read key data, so  can be 
evaluated with default-filled keys.\n\nPlease re-run exact meta cleanup after 
supplemental paths are added, or fold the supplemental paths into the covering 
set before finishing the same pass. A regression/unit case should assert that  
and  are stripped when  is synthesized.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to