VGalaxies commented on code in PR #16789:
URL: https://github.com/apache/iotdb/pull/16789#discussion_r2596178493
##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/pipe/datastructure/pattern/TreePattern.java:
##########
@@ -274,65 +286,145 @@ public static TreePattern parsePatternFromString(
}
/**
- * A private helper method to parse a set of 'path' and 'pattern' keys into
a single union
- * TreePattern. This contains the original logic of
parsePipePatternFromSourceParameters.
- *
- * @param sourceParameters The source parameters.
- * @param isTreeModelDataAllowedToBeCaptured Flag for TreePattern
constructor.
- * @param extractorPathKey Key for extractor path (e.g., "extractor.path").
- * @param sourcePathKey Key for source path (e.g., "source.path").
- * @param extractorPatternKey Key for extractor pattern (e.g.,
"extractor.pattern").
- * @param sourcePatternKey Key for source pattern (e.g., "source.pattern").
- * @param defaultPattern The pattern to return if both path and pattern are
null. If this
- * parameter is null, this method returns null.
- * @return The parsed TreePattern, or defaultPattern, or null if
defaultPattern is null and no
- * patterns are specified.
+ * Helper method to parse pattern parameters into a list of patterns without
creating the Union
+ * object immediately.
*/
- private static TreePattern parsePatternUnion(
+ private static List<TreePattern> parsePatternList(
final PipeParameters sourceParameters,
final boolean isTreeModelDataAllowedToBeCaptured,
final String extractorPathKey,
final String sourcePathKey,
final String extractorPatternKey,
- final String sourcePatternKey,
- final TreePattern defaultPattern) {
+ final String sourcePatternKey) {
final String path = sourceParameters.getStringByKeys(extractorPathKey,
sourcePathKey);
final String pattern =
sourceParameters.getStringByKeys(extractorPatternKey, sourcePatternKey);
- // 1. If both "source.path" and "source.pattern" are specified, their
union will be used.
- if (path != null && pattern != null) {
- final List<TreePattern> result = new ArrayList<>();
- // Parse "source.path" as IoTDB-style path.
+ final List<TreePattern> result = new ArrayList<>();
+
+ if (path != null) {
result.addAll(
parseMultiplePatterns(
path, p -> new
IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p)));
- // Parse "source.pattern" using the helper method.
+ }
+
+ if (pattern != null) {
result.addAll(
parsePatternsFromPatternParameter(
pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured));
- return buildUnionPattern(isTreeModelDataAllowedToBeCaptured, result);
}
- // 2. If only "source.path" is specified, it will be interpreted as an
IoTDB-style path.
- if (path != null) {
- return buildUnionPattern(
- isTreeModelDataAllowedToBeCaptured,
- parseMultiplePatterns(
- path, p -> new
IoTDBTreePattern(isTreeModelDataAllowedToBeCaptured, p)));
+ return result;
+ }
+
+ /**
+ * Removes patterns from the list that are covered by other patterns in the
same list. For
+ * example, if "root.**" and "root.db.**" are present, "root.db.**" is
removed.
+ */
+ private static List<TreePattern> optimizePatterns(final List<TreePattern>
patterns) {
+ if (patterns == null || patterns.isEmpty()) {
+ return new ArrayList<>();
+ }
+ if (patterns.size() == 1) {
+ return patterns;
}
- // 3. If only "source.pattern" is specified, parse it using the helper
method.
- if (pattern != null) {
- return buildUnionPattern(
- isTreeModelDataAllowedToBeCaptured,
- parsePatternsFromPatternParameter(
- pattern, sourceParameters, isTreeModelDataAllowedToBeCaptured));
+ final List<TreePattern> optimized = new ArrayList<>();
+ // Determine coverage using base paths
+ for (int i = 0; i < patterns.size(); i++) {
+ final TreePattern current = patterns.get(i);
+ boolean isCoveredByOther = false;
+
+ for (int j = 0; j < patterns.size(); j++) {
+ if (i == j) {
+ continue;
+ }
+ final TreePattern other = patterns.get(j);
+
+ // If 'other' covers 'current', 'current' is redundant.
+ // Note: if they mutually cover each other (duplicates), we must
ensure we keep one.
+ // We use index comparison to break ties for exact duplicates.
+ if (covers(other, current)) {
+ if (covers(current, other)) {
+ // Both cover each other (likely identical). Keep the one with
smaller index.
+ if (j < i) {
+ isCoveredByOther = true;
+ break;
+ }
+ } else {
+ // Strict coverage
+ isCoveredByOther = true;
+ break;
+ }
+ }
+ }
+
+ if (!isCoveredByOther) {
+ optimized.add(current);
+ }
+ }
+ return optimized;
+ }
Review Comment:
Yes, We define $N$ as the number of patterns, $L$ as the average length
(depth) of a path:
1. Algorithm Change: Instead of comparing every pattern against every other
pattern ($O(N^2)$), we build a Trie ($O(N \cdot L)$).
2. Sort First: We sort the patterns by "broadness" (shortest length and most
wildcards first). This ensures that dominating patterns (like root.**) are
inserted first.
3. Filter: For each subsequent pattern, we check if it is already "covered"
by the Trie. If it is, we discard it. If not, we add it to the Trie.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]