jt2594838 commented on code in PR #16789:
URL: https://github.com/apache/iotdb/pull/16789#discussion_r2601003195
##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java:
##########
@@ -395,6 +437,37 @@ private static List<TreePattern> pruneInclusionPatterns(
return prunedInclusion;
}
+ /**
+ * Prunes patterns from the exclusion list that do NOT overlap with any of
the remaining inclusion
+ * patterns.
+ */
+ private static List<TreePattern> pruneIrrelevantExclusions(
+ final List<TreePattern> inclusion, final List<TreePattern> exclusion) {
+ if (exclusion == null || exclusion.isEmpty()) {
+ return new ArrayList<>();
+ }
+ if (inclusion == null || inclusion.isEmpty()) {
+ // If inclusion is empty, exclusion is irrelevant anyway, but usually
this case
+ // throws exception earlier.
+ return new ArrayList<>();
+ }
+
+ final List<TreePattern> relevantExclusion = new ArrayList<>();
+ for (final TreePattern exc : exclusion) {
+ boolean overlapsWithAnyInclusion = false;
+ for (final TreePattern inc : inclusion) {
+ if (overlaps(exc, inc)) {
+ overlapsWithAnyInclusion = true;
+ break;
+ }
+ }
+ if (overlapsWithAnyInclusion) {
+ relevantExclusion.add(exc);
+ }
+ }
Review Comment:
May use PatternTrie here?
##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java:
##########
@@ -649,4 +758,86 @@ public static int[] checkAndLogPatternCoverage(
return new int[] {coveredCount, inclusionPaths.size()};
}
+
+ /** A specialized Trie to efficiently check path coverage. */
+ private static class PatternTrie {
+ private final TrieNode root = new TrieNode();
+
+ private static class TrieNode {
+ // Children nodes mapped by path segment
+ Map<String, TrieNode> children = new HashMap<>();
+ // Marks if a pattern ends here (e.g., "root.sg" is a set path)
+ boolean isLeaf = false;
+ // Special flags for optimization
+ boolean isMultiLevelWildcard = false; // Ends with **
+ }
+
+ /** Adds a path to the Trie. */
+ public void add(final PartialPath path) {
+ TrieNode node = root;
+ final String[] nodes = path.getNodes();
+
+ for (final String segment : nodes) {
+ // If we are at a node that is already a MultiLevelWildcard (**),
+ // everything below is already covered. We can stop adding.
+ if (node.isMultiLevelWildcard) {
+ return;
+ }
+
+ node = node.children.computeIfAbsent(segment, k -> new TrieNode());
+
+ // If this segment is **, mark it.
+ // Note: In IoTDB PartialPath, ** is usually the last node or specific
wildcard.
+ if (segment.equals(IoTDBConstant.MULTI_LEVEL_PATH_WILDCARD)) {
+ node.isMultiLevelWildcard = true;
+ // Optimization: clear children as ** covers everything
+ node.children.clear();
Review Comment:
May set to Collections.emptyMap?
##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java:
##########
@@ -329,40 +357,54 @@ private static List<TreePattern> optimizePatterns(final
List<TreePattern> patter
return patterns;
}
+ // 1. Sort patterns by "Broadness"
+ // Heuristic: Shorter paths and paths with wildcards should come first.
+ // This allows us to insert 'root.**' first, so we can quickly skip
'root.sg.d1' later.
+ final List<TreePattern> sortedPatterns = new ArrayList<>(patterns);
+ sortedPatterns.sort(
+ (o1, o2) -> {
+ // We can only approximate comparison here since TreePattern
represents multiple paths.
+ // We use the first inclusion path as a representative.
+ final PartialPath p1 = o1.getBaseInclusionPaths().get(0);
+ final PartialPath p2 = o2.getBaseInclusionPaths().get(0);
+
+ // 1. Length: Shorter is generally broader (e.g., root.** vs
root.sg.d1)
+ final int lenCompare = Integer.compare(p1.getNodeLength(),
p2.getNodeLength());
+ if (lenCompare != 0) return lenCompare;
Review Comment:
Mind the code style.
For `if` with only one line, we still use `{}`.
##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java:
##########
@@ -649,4 +758,86 @@ public static int[] checkAndLogPatternCoverage(
return new int[] {coveredCount, inclusionPaths.size()};
}
+
+ /** A specialized Trie to efficiently check path coverage. */
+ private static class PatternTrie {
+ private final TrieNode root = new TrieNode();
+
+ private static class TrieNode {
+ // Children nodes mapped by path segment
+ Map<String, TrieNode> children = new HashMap<>();
+ // Marks if a pattern ends here (e.g., "root.sg" is a set path)
+ boolean isLeaf = false;
+ // Special flags for optimization
+ boolean isMultiLevelWildcard = false; // Ends with **
+ }
+
+ /** Adds a path to the Trie. */
+ public void add(final PartialPath path) {
+ TrieNode node = root;
+ final String[] nodes = path.getNodes();
+
+ for (final String segment : nodes) {
+ // If we are at a node that is already a MultiLevelWildcard (**),
+ // everything below is already covered. We can stop adding.
+ if (node.isMultiLevelWildcard) {
+ return;
+ }
+
+ node = node.children.computeIfAbsent(segment, k -> new TrieNode());
+
+ // If this segment is **, mark it.
+ // Note: In IoTDB PartialPath, ** is usually the last node or specific
wildcard.
+ if (segment.equals(IoTDBConstant.MULTI_LEVEL_PATH_WILDCARD)) {
+ node.isMultiLevelWildcard = true;
+ // Optimization: clear children as ** covers everything
+ node.children.clear();
+ node.isLeaf = true;
+ return;
+ }
+ }
+ node.isLeaf = true;
+ }
+
+ /**
+ * Checks if the given path is covered by any existing pattern in the
Trie. e.g., if Trie has
+ * "root.sg.**", then "root.sg.d1.s1" returns true.
+ */
+ public boolean isCovered(final PartialPath path) {
+ return checkCoverage(root, path.getNodes(), 0);
+ }
+
+ private boolean checkCoverage(final TrieNode node, final String[]
pathNodes, final int index) {
+ // 1. If the Trie node is a Multi-Level Wildcard (**), it covers
everything remainder
+ if (node.isMultiLevelWildcard) {
+ return true;
+ }
+
+ // 2. If we reached the end of the query path
+ if (index >= pathNodes.length) {
+ // The path is covered if the Trie also ends here (isLeaf)
+ // Example: Trie="root.sg", Path="root.sg" -> Covered
+ // Example: Trie="root.sg.d1", Path="root.sg" -> Not Covered (Trie is
more specific)
+ return node.isLeaf;
+ }
+
+ final String currentSegment = pathNodes[index];
+
+ // 3. Direct Match or Single Level Wildcard (*) in Trie
+ // Check exact match child
+ final TrieNode child = node.children.get(currentSegment);
+ if (child != null && checkCoverage(child, pathNodes, index + 1)) {
+ return true;
+ }
+
+ // Check if Trie has a '*' child (One Level Wildcard)
+ // '*' in Trie covers any single level in Path
+ final TrieNode wildcardChild =
node.children.get(IoTDBConstant.ONE_LEVEL_PATH_WILDCARD);
+ if (wildcardChild != null) {
+ return checkCoverage(wildcardChild, pathNodes, index + 1);
+ }
Review Comment:
How about storing the wildcard child separately (as another nuallable field
of TrieNode) to reduce map access.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]