This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new c4c8bb76b2f Pipe: support pattern pruning and redundancy removal in
TreePattern parsing (#16789)
c4c8bb76b2f is described below
commit c4c8bb76b2f763e87ac3e437af9097e0d7a5033e
Author: VGalaxies <[email protected]>
AuthorDate: Thu Dec 25 12:08:40 2025 +0800
Pipe: support pattern pruning and redundancy removal in TreePattern parsing
(#16789)
* setup
* reset
* add UT
* apply review
* use trie for optimizePatterns
* apply review
* remove useless
* improve pruneInclusionPatterns & pruneIrrelevantExclusions
---
.../db/pipe/pattern/TreePatternPruningTest.java | 156 +++++++
.../pipe/datastructure/pattern/TreePattern.java | 488 +++++++++++++++++----
.../pattern/UnionIoTDBTreePattern.java | 6 +
.../pattern/WithExclusionIoTDBTreePattern.java | 2 -
.../pattern/WithExclusionTreePattern.java | 2 -
5 files changed, 571 insertions(+), 83 deletions(-)
diff --git
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java
new file mode 100644
index 00000000000..23b6c334b5c
--- /dev/null
+++
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/pipe/pattern/TreePatternPruningTest.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.db.pipe.pattern;
+
+import org.apache.iotdb.commons.pipe.config.constant.PipeSourceConstant;
+import org.apache.iotdb.commons.pipe.datastructure.pattern.IoTDBTreePattern;
+import org.apache.iotdb.commons.pipe.datastructure.pattern.PrefixTreePattern;
+import org.apache.iotdb.commons.pipe.datastructure.pattern.TreePattern;
+import
org.apache.iotdb.commons.pipe.datastructure.pattern.UnionIoTDBTreePattern;
+import org.apache.iotdb.pipe.api.customizer.parameter.PipeParameters;
+import org.apache.iotdb.pipe.api.exception.PipeException;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+
+public class TreePatternPruningTest {
+
+ @Test
+ public void testUnionInternalPruning_Cover() {
+ final PipeParameters params =
+ new PipeParameters(
+ new HashMap<String, String>() {
+ {
+ put(PipeSourceConstant.SOURCE_PATH_KEY,
"root.db.d1.*,root.db.d1.s1");
+ }
+ });
+
+ final TreePattern result =
TreePattern.parsePipePatternFromSourceParameters(params);
+
+ Assert.assertTrue("Should be IoTDBTreePattern", result instanceof
IoTDBTreePattern);
+ Assert.assertEquals("root.db.d1.*", result.getPattern());
+ }
+
+ @Test
+ public void testUnionInternalPruning_Duplicates() {
+ final PipeParameters params =
+ new PipeParameters(
+ new HashMap<String, String>() {
+ {
+ put(PipeSourceConstant.SOURCE_PATH_KEY,
"root.db.d1,root.db.d1");
+ }
+ });
+
+ final TreePattern result =
TreePattern.parsePipePatternFromSourceParameters(params);
+
+ Assert.assertTrue(result instanceof IoTDBTreePattern);
+ Assert.assertEquals("root.db.d1", result.getPattern());
+ }
+
+ @Test
+ public void testInclusionPrunedByExclusion_Partial() {
+ final PipeParameters params =
+ new PipeParameters(
+ new HashMap<String, String>() {
+ {
+ put(PipeSourceConstant.SOURCE_PATH_KEY,
"root.sg.d1,root.sg.d2");
+ put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY,
"root.sg.d1");
+ }
+ });
+
+ final TreePattern result =
TreePattern.parsePipePatternFromSourceParameters(params);
+
+ Assert.assertTrue(result instanceof IoTDBTreePattern);
+ Assert.assertEquals("root.sg.d2", result.getPattern());
+ }
+
+ @Test
+ public void testInclusionPrunedByExclusion_FullCoverage_Exception() {
+ final PipeParameters params =
+ new PipeParameters(
+ new HashMap<String, String>() {
+ {
+ put(PipeSourceConstant.SOURCE_PATH_KEY, "root.sg.d1");
+ put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY,
"root.sg.**");
+ }
+ });
+
+ try {
+ TreePattern.parsePipePatternFromSourceParameters(params);
+ Assert.fail("Should throw PipeException because Exclusion fully covers
Inclusion");
+ } catch (final PipeException ignored) {
+ // Expected exception
+ }
+ }
+
+ @Test
+ public void testComplexPruning() {
+ final PipeParameters params =
+ new PipeParameters(
+ new HashMap<String, String>() {
+ {
+ put(PipeSourceConstant.SOURCE_PATH_KEY,
"root.sg.A,root.sg.B,root.sg.A.sub");
+ put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY,
"root.sg.A,root.sg.A.**");
+ }
+ });
+
+ final TreePattern result =
TreePattern.parsePipePatternFromSourceParameters(params);
+
+ Assert.assertTrue(result instanceof IoTDBTreePattern);
+ Assert.assertEquals("root.sg.B", result.getPattern());
+ }
+
+ @Test
+ public void testComplexPruning_Prefix() {
+ final PipeParameters params =
+ new PipeParameters(
+ new HashMap<String, String>() {
+ {
+ put(PipeSourceConstant.SOURCE_PATTERN_KEY,
"root.sg.A,root.sg.B,root.sg.A.sub");
+ put(PipeSourceConstant.SOURCE_PATTERN_EXCLUSION_KEY,
"root.sg.A");
+ put(PipeSourceConstant.SOURCE_PATTERN_FORMAT_KEY, "prefix");
+ }
+ });
+
+ final TreePattern result =
TreePattern.parsePipePatternFromSourceParameters(params);
+
+ Assert.assertTrue(result instanceof PrefixTreePattern);
+ Assert.assertEquals("root.sg.B", result.getPattern());
+ }
+
+ @Test
+ public void testUnionPreservedWhenNotCovered() {
+ final PipeParameters params =
+ new PipeParameters(
+ new HashMap<String, String>() {
+ {
+ put(PipeSourceConstant.SOURCE_PATH_KEY,
"root.sg.d1,root.sg.d2");
+ put(PipeSourceConstant.SOURCE_PATH_EXCLUSION_KEY,
"root.other");
+ }
+ });
+
+ final TreePattern result =
TreePattern.parsePipePatternFromSourceParameters(params);
+
+ Assert.assertTrue(result instanceof UnionIoTDBTreePattern);
+ Assert.assertEquals("root.sg.d1,root.sg.d2", result.getPattern());
+ }
+}
diff --git
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java
index 81a93e0c5e2..c35ba5c81fc 100644
---
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java
+++
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java
@@ -19,9 +19,11 @@
package org.apache.iotdb.commons.pipe.datastructure.pattern;
+import org.apache.iotdb.commons.conf.IoTDBConstant;
import org.apache.iotdb.commons.path.PartialPath;
import org.apache.iotdb.commons.pipe.config.constant.PipeSourceConstant;
import org.apache.iotdb.commons.pipe.config.constant.SystemConstant;
+import org.apache.iotdb.commons.utils.TestOnly;
import org.apache.iotdb.pipe.api.customizer.parameter.PipeParameters;
import org.apache.iotdb.pipe.api.exception.PipeException;
@@ -32,7 +34,9 @@ import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -140,60 +144,83 @@ public abstract class TreePattern {
final boolean isTreeModelDataAllowedToBeCaptured =
isTreeModelDataAllowToBeCaptured(sourceParameters);
- // 1. Define the default inclusion pattern (matches all, "root.**")
- // This is used if no inclusion patterns are specified.
- final TreePattern defaultInclusionPattern =
- buildUnionPattern(
- isTreeModelDataAllowedToBeCaptured,
- Collections.singletonList(
- new IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured,
null)));
-
- // 2. Parse INCLUSION patterns using the helper
- final TreePattern inclusionPattern =
- parsePatternUnion(
+ // 1. Parse INCLUSION patterns into a list
+ List<TreePattern> inclusionPatterns =
+ parsePatternList(
sourceParameters,
isTreeModelDataAllowedToBeCaptured,
- // 'path' keys (IoTDB wildcard)
EXTRACTOR_PATH_KEY,
SOURCE_PATH_KEY,
- // 'pattern' keys (Prefix or IoTDB via format)
EXTRACTOR_PATTERN_KEY,
- SOURCE_PATTERN_KEY,
- // Default pattern if no keys are found
- defaultInclusionPattern);
+ SOURCE_PATTERN_KEY);
+
+ // If no inclusion patterns are specified, use default "root.**"
+ if (inclusionPatterns.isEmpty()) {
+ inclusionPatterns =
+ new ArrayList<>(
+ Collections.singletonList(
+ new IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured,
null)));
+ }
- // 3. Parse EXCLUSION patterns using the helper
- final TreePattern exclusionPattern =
- parsePatternUnion(
+ // 2. Parse EXCLUSION patterns into a list
+ List<TreePattern> exclusionPatterns =
+ parsePatternList(
sourceParameters,
isTreeModelDataAllowedToBeCaptured,
- // 'path.exclusion' keys (IoTDB wildcard)
EXTRACTOR_PATH_EXCLUSION_KEY,
SOURCE_PATH_EXCLUSION_KEY,
- // 'pattern.exclusion' keys (Prefix)
EXTRACTOR_PATTERN_EXCLUSION_KEY,
- SOURCE_PATTERN_EXCLUSION_KEY,
- // Default for exclusion is "match nothing" (null)
- null);
-
- // 4. Combine inclusion and exclusion
- if (exclusionPattern == null) {
- // No exclusion defined, return the inclusion pattern directly
- return inclusionPattern;
- } else {
- // If both inclusion and exclusion patterns support IoTDB operations,
- // use the specialized ExclusionIoTDBTreePattern
- if (inclusionPattern instanceof IoTDBTreePatternOperations
- && exclusionPattern instanceof IoTDBTreePatternOperations) {
- return new WithExclusionIoTDBTreePattern(
- isTreeModelDataAllowedToBeCaptured,
- (IoTDBTreePatternOperations) inclusionPattern,
- (IoTDBTreePatternOperations) exclusionPattern);
- }
- // Both are defined, wrap them in an ExclusionTreePattern
- return new WithExclusionTreePattern(
- isTreeModelDataAllowedToBeCaptured, inclusionPattern,
exclusionPattern);
+ SOURCE_PATTERN_EXCLUSION_KEY);
+
+ // 3. Optimize the lists: remove redundant patterns (e.g., if "root.**"
exists, "root.db" is
+ // redundant)
+ inclusionPatterns = optimizePatterns(inclusionPatterns);
+ exclusionPatterns = optimizePatterns(exclusionPatterns);
+
+ // 4. Prune inclusion patterns: if an inclusion pattern is fully covered
by an exclusion
+ // pattern, remove it
+ inclusionPatterns = pruneInclusionPatterns(inclusionPatterns,
exclusionPatterns);
+
+ // 5. Check if the resulting inclusion pattern is empty
+ if (inclusionPatterns.isEmpty()) {
+ final String msg =
+ String.format(
+ "Pipe: The provided exclusion pattern fully covers the inclusion
pattern. "
+ + "This pipe pattern will match nothing. "
+ + "Inclusion: %s, Exclusion: %s",
+ sourceParameters.getStringByKeys(EXTRACTOR_PATTERN_KEY,
SOURCE_PATTERN_KEY),
+ sourceParameters.getStringByKeys(
+ EXTRACTOR_PATTERN_EXCLUSION_KEY,
SOURCE_PATTERN_EXCLUSION_KEY));
+ LOGGER.warn(msg);
+ throw new PipeException(msg);
+ }
+
+ // 6. Prune exclusion patterns: if an exclusion pattern does not overlap
with
+ // ANY of the remaining inclusion patterns, it is useless and should be
removed.
+ exclusionPatterns = pruneIrrelevantExclusions(inclusionPatterns,
exclusionPatterns);
+
+ // 7. Build final patterns
+ final TreePattern finalInclusionPattern =
+ buildUnionPattern(isTreeModelDataAllowedToBeCaptured,
inclusionPatterns);
+
+ if (exclusionPatterns.isEmpty()) {
+ return finalInclusionPattern;
}
+
+ final TreePattern finalExclusionPattern =
+ buildUnionPattern(isTreeModelDataAllowedToBeCaptured,
exclusionPatterns);
+
+ // 8. Combine inclusion and exclusion
+ if (finalInclusionPattern instanceof IoTDBTreePatternOperations
+ && finalExclusionPattern instanceof IoTDBTreePatternOperations) {
+ return new WithExclusionIoTDBTreePattern(
+ isTreeModelDataAllowedToBeCaptured,
+ (IoTDBTreePatternOperations) finalInclusionPattern,
+ (IoTDBTreePatternOperations) finalExclusionPattern);
+ }
+
+ return new WithExclusionTreePattern(
+ isTreeModelDataAllowedToBeCaptured, finalInclusionPattern,
finalExclusionPattern);
}
/**
@@ -274,65 +301,227 @@ public abstract class TreePattern {
}
/**
- * A private helper method to parse a set of 'path' and 'pattern' keys into
a single union
- * TreePattern. This contains the original logic of
parsePipePatternFromSourceParameters.
- *
- * @param sourceParameters The source parameters.
- * @param isTreeModelDataAllowedToBeCaptured Flag for TreePattern
constructor.
- * @param extractorPathKey Key for extractor path (e.g., "extractor.path").
- * @param sourcePathKey Key for source path (e.g., "source.path").
- * @param extractorPatternKey Key for extractor pattern (e.g.,
"extractor.pattern").
- * @param sourcePatternKey Key for source pattern (e.g., "source.pattern").
- * @param defaultPattern The pattern to return if both path and pattern are
null. If this
- * parameter is null, this method returns null.
- * @return The parsed TreePattern, or defaultPattern, or null if
defaultPattern is null and no
- * patterns are specified.
+ * Helper method to parse pattern parameters into a list of patterns without
creating the Union
+ * object immediately.
*/
- private static TreePattern parsePatternUnion(
+ private static List<TreePattern> parsePatternList(
final PipeParameters sourceParameters,
final boolean isTreeModelDataAllowedToBeCaptured,
final String extractorPathKey,
final String sourcePathKey,
final String extractorPatternKey,
- final String sourcePatternKey,
- final TreePattern defaultPattern) {
+ final String sourcePatternKey) {
final String path = sourceParameters.getStringByKeys(extractorPathKey,
sourcePathKey);
final String pattern =
sourceParameters.getStringByKeys(extractorPatternKey, sourcePatternKey);
- // 1. If both "source.path" and "source.pattern" are specified, their
union will be used.
- if (path != null && pattern != null) {
- final List<TreePattern> result = new ArrayList<>();
- // Parse "source.path" as IoTDB-style path.
+ final List<TreePattern> result = new ArrayList<>();
+
+ if (path != null) {
result.addAll(
parseMultiplePatterns(
path, p -> new
IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p)));
- // Parse "source.pattern" using the helper method.
+ }
+
+ if (pattern != null) {
result.addAll(
parsePatternsFromPatternParameter(
pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured));
- return buildUnionPattern(isTreeModelDataAllowedToBeCaptured, result);
}
- // 2. If only "source.path" is specified, it will be interpreted as an
IoTDB-style path.
- if (path != null) {
- return buildUnionPattern(
- isTreeModelDataAllowedToBeCaptured,
- parseMultiplePatterns(
- path, p -> new
IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p)));
+ return result;
+ }
+
+ /**
+ * Removes patterns from the list that are covered by other patterns in the
same list. For
+ * example, if "root.**" and "root.db.**" are present, "root.db.**" is
removed.
+ *
+ * <p><b>Optimization Strategy:</b>
+ *
+ * <ol>
+ * <li><b>Sort First:</b> Patterns are sorted by "broadness" (shortest
length and most wildcards
+ * first). This ensures that dominating patterns (like {@code
root.**}) are processed first.
+ * <li><b>Filter with Trie:</b> Instead of comparing every pattern against
every other pattern
+ * (O(N^2)), we build a Trie to check for coverage. For each pattern,
we check if it is
+ * already "covered" by the Trie. If it is, we discard it; if not, we
add it to the Trie.
+ * </ol>
+ *
+ * <p><b>Time Complexity:</b> O(N * L), where N is the number of patterns
and L is the average
+ * path length.
+ */
+ private static List<TreePattern> optimizePatterns(final List<TreePattern>
patterns) {
+ if (patterns == null || patterns.isEmpty()) {
+ return new ArrayList<>();
+ }
+ if (patterns.size() == 1) {
+ return patterns;
}
- // 3. If only "source.pattern" is specified, parse it using the helper
method.
- if (pattern != null) {
- return buildUnionPattern(
- isTreeModelDataAllowedToBeCaptured,
- parsePatternsFromPatternParameter(
- pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured));
+ // 1. Sort patterns by "Broadness"
+ // Heuristic: Shorter paths and paths with wildcards should come first.
+ // This allows us to insert 'root.**' first, so we can quickly skip
'root.sg.d1' later.
+ final List<TreePattern> sortedPatterns = new ArrayList<>(patterns);
+ sortedPatterns.sort(
+ (o1, o2) -> {
+ // We can only approximate comparison here since TreePattern
represents multiple paths.
+ // We use the first inclusion path as a representative.
+ final PartialPath p1 = o1.getBaseInclusionPaths().get(0);
+ final PartialPath p2 = o2.getBaseInclusionPaths().get(0);
+
+ // 1. Length: Shorter is generally broader (e.g., root.** vs
root.sg.d1)
+ final int lenCompare = Integer.compare(p1.getNodeLength(),
p2.getNodeLength());
+ if (lenCompare != 0) {
+ return lenCompare;
+ }
+
+ // 2. Wildcards: Pattern with wildcards is broader (e.g., root.sg.*
vs root.sg.d1)
+ final boolean w1 = p1.hasWildcard();
+ final boolean w2 = p2.hasWildcard();
+ if (w1 && !w2) {
+ return -1;
+ }
+ if (!w1 && w2) {
+ return 1;
+ }
+
+ // 3. Deterministic tie-breaker
+ return p1.compareTo(p2);
+ });
+
+ // 2. Filter using Trie
+ final PatternTrie trie = new PatternTrie();
+ final List<TreePattern> optimized = new ArrayList<>();
+
+ for (final TreePattern pattern : sortedPatterns) {
+ boolean isCovered = true;
+ // A pattern is redundant only if ALL its base paths are covered by the
Trie
+ for (final PartialPath path : pattern.getBaseInclusionPaths()) {
+ if (!trie.isCovered(path)) {
+ isCovered = false;
+ break;
+ }
+ }
+
+ if (!isCovered) {
+ optimized.add(pattern);
+ // Add all its paths to the Trie to cover future patterns
+ for (final PartialPath path : pattern.getBaseInclusionPaths()) {
+ trie.add(path);
+ }
+ }
}
- // 4. If neither "source.path" nor "source.pattern" is specified,
- // return the provided default pattern (which may be null).
- return defaultPattern;
+ return optimized;
+ }
+
+ /**
+ * Prunes patterns from the inclusion list that are fully covered by ANY
pattern in the exclusion
+ * list.
+ *
+ * <p><b>Optimization Strategy:</b>
+ *
+ * <ol>
+ * <li><b>Build Exclusion Trie:</b> Construct a Trie containing all paths
from the exclusion
+ * list. This aggregates the coverage of all exclusion patterns into a
single structure.
+ * <li><b>Check Coverage:</b> Iterate through the inclusion list. For each
inclusion pattern,
+ * check if ALL of its represented paths are covered by the Exclusion
Trie. If so, the
+ * pattern is redundant and removed.
+ * </ol>
+ *
+ * <p><b>Time Complexity:</b> O((N + M) * L), where N is the number of
inclusion patterns, M is
+ * the number of exclusion patterns, and L is the average path length.
+ */
+ private static List<TreePattern> pruneInclusionPatterns(
+ final List<TreePattern> inclusion, final List<TreePattern> exclusion) {
+ if (inclusion == null || inclusion.isEmpty()) {
+ return new ArrayList<>();
+ }
+ if (exclusion == null || exclusion.isEmpty()) {
+ return inclusion;
+ }
+
+ // 1. Build Trie with all exclusion paths
+ // The Trie represents the union of all excluded areas.
+ final PatternTrie exclusionTrie = new PatternTrie();
+ for (final TreePattern exc : exclusion) {
+ for (final PartialPath path : exc.getBaseInclusionPaths()) {
+ exclusionTrie.add(path);
+ }
+ }
+
+ final List<TreePattern> prunedInclusion = new ArrayList<>();
+ // 2. Filter inclusion patterns
+ for (final TreePattern inc : inclusion) {
+ boolean isFullyExcluded = true;
+ // An inclusion pattern is fully excluded only if ALL its constituent
base paths
+ // are covered by the exclusion Trie.
+ for (final PartialPath path : inc.getBaseInclusionPaths()) {
+ if (!exclusionTrie.isCovered(path)) {
+ isFullyExcluded = false;
+ break;
+ }
+ }
+
+ // If not fully excluded (i.e., at least one path survives), keep it.
+ if (!isFullyExcluded) {
+ prunedInclusion.add(inc);
+ }
+ }
+ return prunedInclusion;
+ }
+
+ /**
+ * Prunes patterns from the exclusion list that do NOT overlap with any of
the remaining inclusion
+ * patterns.
+ *
+ * <p><b>Optimization Strategy:</b>
+ *
+ * <ol>
+ * <li><b>Build Inclusion Trie:</b> Construct a Trie containing all paths
from the inclusion
+ * list. This aggregates the search space of inclusion patterns.
+ * <li><b>Filter Exclusions:</b> Iterate through the exclusion list. For
each exclusion pattern,
+ * check if it overlaps with the Inclusion Trie. Only exclusions that
overlap with at least
+ * one inclusion pattern are kept.
+ * </ol>
+ *
+ * <p><b>Time Complexity:</b> O((N + M) * L), where N is the number of
inclusion patterns, M is
+ * the number of exclusion patterns, and L is the average path length.
+ */
+ private static List<TreePattern> pruneIrrelevantExclusions(
+ final List<TreePattern> inclusion, final List<TreePattern> exclusion) {
+ if (exclusion == null || exclusion.isEmpty()) {
+ return new ArrayList<>();
+ }
+ if (inclusion == null || inclusion.isEmpty()) {
+ // If inclusion is empty, exclusion is irrelevant anyway.
+ return new ArrayList<>();
+ }
+
+ // 1. Build Trie from Inclusion Patterns
+ final PatternTrie inclusionTrie = new PatternTrie();
+ for (final TreePattern inc : inclusion) {
+ for (final PartialPath path : inc.getBaseInclusionPaths()) {
+ inclusionTrie.add(path);
+ }
+ }
+
+ // 2. Filter Exclusion Patterns using the Trie
+ final List<TreePattern> relevantExclusion = new ArrayList<>();
+ for (final TreePattern exc : exclusion) {
+ boolean overlapsWithAnyInclusion = false;
+ // An exclusion pattern is relevant if ANY of its base paths overlap
with the inclusion Trie
+ for (final PartialPath path : exc.getBaseInclusionPaths()) {
+ if (inclusionTrie.overlaps(path)) {
+ overlapsWithAnyInclusion = true;
+ break;
+ }
+ }
+
+ if (overlapsWithAnyInclusion) {
+ relevantExclusion.add(exc);
+ }
+ }
+ return relevantExclusion;
}
/**
@@ -412,6 +601,10 @@ public abstract class TreePattern {
*/
public static TreePattern buildUnionPattern(
final boolean isTreeModelDataAllowedToBeCaptured, final
List<TreePattern> patterns) {
+ if (patterns.size() == 1) {
+ return patterns.get(0);
+ }
+
// Check if all instances in the list are of type IoTDBTreePattern
boolean allIoTDB = true;
for (final TreePattern p : patterns) {
@@ -490,6 +683,7 @@ public abstract class TreePattern {
* @return An int array `[coveredCount, totalInclusionPaths]` for testing
non-failing scenarios.
* @throws PipeException If the inclusion pattern is fully covered by the
exclusion pattern.
*/
+ @TestOnly
public static int[] checkAndLogPatternCoverage(
final TreePattern inclusion, final TreePattern exclusion) throws
PipeException {
if (inclusion == null || exclusion == null) {
@@ -553,4 +747,140 @@ public abstract class TreePattern {
return new int[] {coveredCount, inclusionPaths.size()};
}
+
+ /** A specialized Trie to efficiently check path coverage. */
+ private static class PatternTrie {
+ private final TrieNode root = new TrieNode();
+
+ private static class TrieNode {
+ // Children nodes mapped by specific path segments (excluding *)
+ Map<String, TrieNode> children = new HashMap<>();
+ // Optimized field for One Level Wildcard (*) child to reduce map lookups
+ TrieNode wildcardNode = null;
+
+ // Marks if a pattern ends here (e.g., "root.sg" is a set path)
+ boolean isLeaf = false;
+ // Special flags for optimization
+ boolean isMultiLevelWildcard = false; // Ends with **
+ }
+
+ /** Adds a path to the Trie. */
+ public void add(final PartialPath path) {
+ TrieNode node = root;
+ final String[] nodes = path.getNodes();
+
+ for (final String segment : nodes) {
+ // If we are at a node that is already a MultiLevelWildcard (**),
+ // everything below is already covered. We can stop adding.
+ if (node.isMultiLevelWildcard) {
+ return;
+ }
+
+ // Check for Multi-Level Wildcard (**)
+ if (segment.equals(IoTDBConstant.MULTI_LEVEL_PATH_WILDCARD)) {
+ node.isMultiLevelWildcard = true;
+ // Optimization: clear children as ** covers everything
+ node.children = Collections.emptyMap();
+ node.wildcardNode = null;
+ node.isLeaf = true;
+ return;
+ }
+
+ // Check for One-Level Wildcard (*)
+ if (segment.equals(IoTDBConstant.ONE_LEVEL_PATH_WILDCARD)) {
+ if (node.wildcardNode == null) {
+ node.wildcardNode = new TrieNode();
+ }
+ node = node.wildcardNode;
+ } else {
+ // Regular specific node
+ node = node.children.computeIfAbsent(segment, k -> new TrieNode());
+ }
+ }
+ node.isLeaf = true;
+ }
+
+ /** Checks if the given path is covered by any existing pattern in the
Trie. */
+ public boolean isCovered(final PartialPath path) {
+ return checkCoverage(root, path.getNodes(), 0);
+ }
+
+ private boolean checkCoverage(final TrieNode node, final String[]
pathNodes, final int index) {
+ // 1. If the Trie node is a Multi-Level Wildcard (**), it covers
everything remainder
+ if (node.isMultiLevelWildcard) {
+ return true;
+ }
+
+ // 2. If we reached the end of the query path
+ if (index >= pathNodes.length) {
+ // The path is covered if the Trie also ends here (isLeaf)
+ return node.isLeaf;
+ }
+
+ final String currentSegment = pathNodes[index];
+
+ // 3. Direct Match in Trie
+ final TrieNode child = node.children.get(currentSegment);
+ if (child != null && checkCoverage(child, pathNodes, index + 1)) {
+ return true;
+ }
+
+ // 4. Single Level Wildcard (*) in Trie
+ if (node.wildcardNode != null) {
+ return checkCoverage(node.wildcardNode, pathNodes, index + 1);
+ }
+
+ return false;
+ }
+
+ /** Checks if the given path overlaps with any pattern in the Trie. */
+ public boolean overlaps(final PartialPath path) {
+ return checkOverlap(root, path.getNodes(), 0);
+ }
+
+ private boolean checkOverlap(final TrieNode node, final String[]
pathNodes, final int index) {
+ // 1. If Trie has '**', it overlaps everything.
+ if (node.isMultiLevelWildcard) {
+ return true;
+ }
+
+ // 2. If Query Path has '**', it overlaps everything remaining in this
valid branch.
+ if (index < pathNodes.length
+ && pathNodes[index].equals(IoTDBConstant.MULTI_LEVEL_PATH_WILDCARD))
{
+ return true;
+ }
+
+ // 3. End of Query Path: Overlap exists if Trie also ends here.
+ if (index >= pathNodes.length) {
+ return node.isLeaf;
+ }
+
+ final String pNode = pathNodes[index];
+
+ // 4. Case: Query Node is '*' (matches any child in Trie, both specific
and wildcard)
+ if (pNode.equals(IoTDBConstant.ONE_LEVEL_PATH_WILDCARD)) {
+ // Check all specific children
+ for (final TrieNode child : node.children.values()) {
+ if (checkOverlap(child, pathNodes, index + 1)) {
+ return true;
+ }
+ }
+ // Check wildcard child
+ if (node.wildcardNode != null) {
+ return checkOverlap(node.wildcardNode, pathNodes, index + 1);
+ }
+ return false;
+ }
+
+ // 5. Case: Query Node is specific (e.g., "d1")
+ // 5a. Check exact match in Trie
+ final TrieNode exactChild = node.children.get(pNode);
+ if (exactChild != null && checkOverlap(exactChild, pathNodes, index +
1)) {
+ return true;
+ }
+
+ // 5b. Check '*' in Trie (matches specific query node)
+ return node.wildcardNode != null && checkOverlap(node.wildcardNode,
pathNodes, index + 1);
+ }
+ }
}
diff --git
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java
index 1d047ed89b8..379232ad253 100644
---
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java
+++
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/UnionIoTDBTreePattern.java
@@ -21,6 +21,7 @@ package org.apache.iotdb.commons.pipe.datastructure.pattern;
import org.apache.iotdb.commons.path.PartialPath;
import org.apache.iotdb.commons.path.PathPatternTree;
+import org.apache.iotdb.commons.utils.TestOnly;
import org.apache.tsfile.file.metadata.IDeviceID;
@@ -55,6 +56,11 @@ public class UnionIoTDBTreePattern extends
IoTDBTreePatternOperations {
this.patterns = Collections.singletonList(pattern);
}
+ @TestOnly
+ public List<IoTDBTreePattern> getPatterns() {
+ return patterns;
+ }
+
//////////////////////////// Tree Pattern Operations
////////////////////////////
@Override
diff --git
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java
index f2e580e9cdf..67750760339 100644
---
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java
+++
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionIoTDBTreePattern.java
@@ -46,8 +46,6 @@ public class WithExclusionIoTDBTreePattern extends
IoTDBTreePatternOperations {
super(isTreeModelDataAllowedToBeCaptured);
this.inclusionPattern = inclusionPattern;
this.exclusionPattern = exclusionPattern;
-
- TreePattern.checkAndLogPatternCoverage(inclusionPattern, exclusionPattern);
}
public WithExclusionIoTDBTreePattern(
diff --git
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java
index 0cf1d0d1179..eb255ed8392 100644
---
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java
+++
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/WithExclusionTreePattern.java
@@ -44,8 +44,6 @@ public class WithExclusionTreePattern extends TreePattern {
super(isTreeModelDataAllowedToBeCaptured);
this.inclusionPattern = inclusionPattern;
this.exclusionPattern = exclusionPattern;
-
- TreePattern.checkAndLogPatternCoverage(inclusionPattern, exclusionPattern);
}
@Override