nfsantos commented on code in PR #1249:
URL: https://github.com/apache/jackrabbit-oak/pull/1249#discussion_r1426732371
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTask.java:
##########
@@ -353,63 +354,101 @@ private void downloadWithNaturalOrdering() throws
InterruptedException, TimeoutE
}
}
- private String getPathForRegexFiltering() {
+ private Set<String> getPathsForRegexFiltering() {
if (!regexPathFiltering) {
LOG.info("Regex path filtering disabled.");
- return null;
+ return Set.of();
}
- return getSingleIncludedPath(pathFilters);
+ return extractIncludedPaths(pathFilters);
}
+ /**
+ * Aggregates the included paths from the path filters. The final list
will not contain duplicates or overlapping
+ * paths (i.e., /a and /a/b).
+ *
+ * @param pathFilters Empty set if path filtering should be disabled,
otherwise the paths that should be included
+ * in the Mongo query filters
+ */
// Package private for testing
- static String getSingleIncludedPath(List<PathFilter> pathFilters) {
- // For the time being, we only enable path filtering if there is a
single include path across all indexes and no
- // exclude paths. This is the case for most of the larger indexes. We
can consider generalizing this in the future.
- LOG.info("Creating regex filter from pathFilters: " + pathFilters);
+ static Set<String> extractIncludedPaths(List<PathFilter> pathFilters) {
+ // Path filtering is enabled only if there are no excludedPaths.
if (pathFilters == null) {
- return null;
+ return Set.of();
+ }
+ Set<String> includedPaths = new HashSet<>();
+ Set<String> excludedPaths = new HashSet<>();
+ for (PathFilter pathFilter : pathFilters) {
+ includedPaths.addAll(pathFilter.getIncludedPaths());
+ excludedPaths.addAll(pathFilter.getExcludedPaths());
+ }
+ // Sort by length to make it easier to compute the common ancestors of
include paths
Review Comment:
Corrected.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]