VGalaxies commented on code in PR #16789:
URL: https://github.com/apache/iotdb/pull/16789#discussion_r2596178493


##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java:
##########
@@ -274,65 +286,145 @@ public static TreePattern parsePatternFromString(
   }
 
   /**
-   * A private helper method to parse a set of 'path' and 'pattern' keys into 
a single union
-   * TreePattern. This contains the original logic of 
parsePipePatternFromSourceParameters.
-   *
-   * @param sourceParameters The source parameters.
-   * @param isTreeModelDataAllowedToBeCaptured Flag for TreePattern 
constructor.
-   * @param extractorPathKey Key for extractor path (e.g., "extractor.path").
-   * @param sourcePathKey Key for source path (e.g., "source.path").
-   * @param extractorPatternKey Key for extractor pattern (e.g., 
"extractor.pattern").
-   * @param sourcePatternKey Key for source pattern (e.g., "source.pattern").
-   * @param defaultPattern The pattern to return if both path and pattern are 
null. If this
-   *     parameter is null, this method returns null.
-   * @return The parsed TreePattern, or defaultPattern, or null if 
defaultPattern is null and no
-   *     patterns are specified.
+   * Helper method to parse pattern parameters into a list of patterns without 
creating the Union
+   * object immediately.
    */
-  private static TreePattern parsePatternUnion(
+  private static List<TreePattern> parsePatternList(
       final PipeParameters sourceParameters,
       final boolean isTreeModelDataAllowedToBeCaptured,
       final String extractorPathKey,
       final String sourcePathKey,
       final String extractorPatternKey,
-      final String sourcePatternKey,
-      final TreePattern defaultPattern) {
+      final String sourcePatternKey) {
 
     final String path = sourceParameters.getStringByKeys(extractorPathKey, 
sourcePathKey);
     final String pattern = 
sourceParameters.getStringByKeys(extractorPatternKey, sourcePatternKey);
 
-    // 1. If both "source.path" and "source.pattern" are specified, their 
union will be used.
-    if (path != null && pattern != null) {
-      final List<TreePattern> result = new ArrayList<>();
-      // Parse "source.path" as IoTDB-style path.
+    final List<TreePattern> result = new ArrayList<>();
+
+    if (path != null) {
       result.addAll(
           parseMultiplePatterns(
               path, p -> new 
IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p)));
-      // Parse "source.pattern" using the helper method.
+    }
+
+    if (pattern != null) {
       result.addAll(
           parsePatternsFromPatternParameter(
               pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured));
-      return buildUnionPattern(isTreeModelDataAllowedToBeCaptured, result);
     }
 
-    // 2. If only "source.path" is specified, it will be interpreted as an 
IoTDB-style path.
-    if (path != null) {
-      return buildUnionPattern(
-          isTreeModelDataAllowedToBeCaptured,
-          parseMultiplePatterns(
-              path, p -> new 
IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p)));
+    return result;
+  }
+
+  /**
+   * Removes patterns from the list that are covered by other patterns in the 
same list. For
+   * example, if "root.**" and "root.db.**" are present, "root.db.**" is 
removed.
+   */
+  private static List<TreePattern> optimizePatterns(final List<TreePattern> 
patterns) {
+    if (patterns == null || patterns.isEmpty()) {
+      return new ArrayList<>();
+    }
+    if (patterns.size() == 1) {
+      return patterns;
     }
 
-    // 3. If only "source.pattern" is specified, parse it using the helper 
method.
-    if (pattern != null) {
-      return buildUnionPattern(
-          isTreeModelDataAllowedToBeCaptured,
-          parsePatternsFromPatternParameter(
-              pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured));
+    final List<TreePattern> optimized = new ArrayList<>();
+    // Determine coverage using base paths
+    for (int i = 0; i < patterns.size(); i++) {
+      final TreePattern current = patterns.get(i);
+      boolean isCoveredByOther = false;
+
+      for (int j = 0; j < patterns.size(); j++) {
+        if (i == j) {
+          continue;
+        }
+        final TreePattern other = patterns.get(j);
+
+        // If 'other' covers 'current', 'current' is redundant.
+        // Note: if they mutually cover each other (duplicates), we must 
ensure we keep one.
+        // We use index comparison to break ties for exact duplicates.
+        if (covers(other, current)) {
+          if (covers(current, other)) {
+            // Both cover each other (likely identical). Keep the one with 
smaller index.
+            if (j < i) {
+              isCoveredByOther = true;
+              break;
+            }
+          } else {
+            // Strict coverage
+            isCoveredByOther = true;
+            break;
+          }
+        }
+      }
+
+      if (!isCoveredByOther) {
+        optimized.add(current);
+      }
+    }
+    return optimized;
+  }

Review Comment:
   Yes, We define $N$ as the number of patterns, $L$ as the average length 
(depth) of a path:
   
   1. Algorithm Change: Instead of comparing every pattern against every other 
pattern ($O(N^2)$), we build a Trie ($O(N \cdot L)$).
   2. Sort First: We sort the patterns by "broadness" (shortest length and most 
wildcards first). This ensures that dominating patterns (like root.**) are 
inserted first.
   3. Filter: For each subsequent pattern, we check if it is already "covered" 
by the Trie. If it is, we discard it. If not, we add it to the Trie.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to