This is an automated email from the ASF dual-hosted git repository.
nfsantos pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 2ac4819b9b OAK-10671- [Indexing Job] Improve Mongo regex query: remove
condition on non-indexed _path field to speed-up traversal (#1331)
2ac4819b9b is described below
commit 2ac4819b9b59019fff0f574be982e43b6f51d82d
Author: Nuno Santos <[email protected]>
AuthorDate: Thu Feb 29 11:42:18 2024 +0100
OAK-10671- [Indexing Job] Improve Mongo regex query: remove condition on
non-indexed _path field to speed-up traversal (#1331)
* Change filter on Mongo to apply conditions only on the _modified and _id
fields, so that the filter condition can be evaluated only with the contents of
an index on (_modified, _id).
---
.../pipelined/PipelinedMongoDownloadTask.java | 102 ++++++--------
.../document/flatfile/pipelined/PipelinedIT.java | 34 +++--
.../pipelined/PipelinedMongoDownloadTaskTest.java | 147 +++++----------------
3 files changed, 99 insertions(+), 184 deletions(-)
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTask.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTask.java
index aeac8ba348..d9276d9a69 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTask.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTask.java
@@ -147,15 +147,29 @@ public class PipelinedMongoDownloadTask implements
Callable<PipelinedMongoDownlo
* @param mongoFilterPaths The paths to be included/excluded in
the filter. These define subtrees to be included or excluded.
* (see {@link MongoFilterPaths} for
details)
* @param customExcludeEntriesRegex Documents with paths matching this
regex are excluded from download
- * @param queryUsesIndexTraversal Whether the query will use an index to
traverse the documents.
* @return The filter to be used in the Mongo query, or null if no filter
is required
*/
- static Bson computeMongoQueryFilter(@NotNull MongoFilterPaths
mongoFilterPaths, String customExcludeEntriesRegex, boolean
queryUsesIndexTraversal) {
+ static Bson computeMongoQueryFilter(@NotNull MongoFilterPaths
mongoFilterPaths, String customExcludeEntriesRegex) {
var filters = new ArrayList<Bson>();
- Bson includedFilter = descendantsFilter(mongoFilterPaths.included,
queryUsesIndexTraversal);
- if (includedFilter != null) {
- filters.add(includedFilter);
+ List<Pattern> includedPatterns =
toFilterPatterns(mongoFilterPaths.included);
+ if (!includedPatterns.isEmpty()) {
+ // The conditions above on the _id field is not enough to match
all JCR nodes in the given paths because nodes
+ // with paths longer than a certain threshold, are represented by
Mongo documents where the _id field is replaced
+ // by a hash and the full path is stored in an additional field
_path. To retrieve these long path documents,
+ // we could add a condition on the _path field, but this would
slow down substantially scanning the DB, because
+ // the _path field is not part of the index used by this query
(it's an index on _modified, _id). Therefore,
+ // Mongo would have to retrieve every document from the column
store to evaluate the filter condition. So instead
+ // we add below a condition to download all the long path
documents. These documents can be identified by the
+ // format of the _id field (<n>:h<hash>), so it is possible to
identify them using only the index.
+ // This might download documents for nodes that are not in the
included paths, but those documents will anyway
+ // be filtered in the transform stage. And in most repositories,
the number of long path documents is very small,
+ // often there are none, so the extra documents downloaded will
not slow down by much the download. However, the
+ // performance gains of evaluating the filter of the query using
only the index are very significant, especially
+ // when the index requires only a small number of nodes.
+ var patternsWithLongPathInclude = new
ArrayList<>(includedPatterns);
+ patternsWithLongPathInclude.add(LONG_PATH_ID_PATTERN);
+ filters.add(Filters.in(NodeDocument.ID,
patternsWithLongPathInclude));
}
// The Mongo filter returned here will download the top level path of
each excluded subtree, which in theory
@@ -164,15 +178,13 @@ public class PipelinedMongoDownloadTask implements
Callable<PipelinedMongoDownlo
// This is done because excluding also the top level path would add
extra complexity to the filter and
// would not have any measurable impact on performance because it only
downloads a few extra documents, one
// for each excluded subtree. The transform stage will anyway filter
out these paths.
- Bson excludedFilter = descendantsFilter(mongoFilterPaths.excluded,
queryUsesIndexTraversal);
- if (excludedFilter != null) {
- filters.add(Filters.nor(excludedFilter));
- }
-
+ ArrayList<Pattern> excludedPatterns = new ArrayList<>();
+ excludedPatterns.addAll(toFilterPatterns(mongoFilterPaths.excluded));
// Custom regex filter to exclude paths
- Bson customExcludedPathsFilter =
createCustomExcludedEntriesFilter(customExcludeEntriesRegex,
queryUsesIndexTraversal);
- if (customExcludedPathsFilter != null) {
- filters.add(customExcludedPathsFilter);
+
excludedPatterns.addAll(customExcludedPatterns(customExcludeEntriesRegex));
+
+ if (!excludedPatterns.isEmpty()) {
+ filters.add(Filters.nin(NodeDocument.ID, excludedPatterns));
}
if (filters.isEmpty()) {
@@ -184,65 +196,31 @@ public class PipelinedMongoDownloadTask implements
Callable<PipelinedMongoDownlo
}
}
- static Bson createCustomExcludedEntriesFilter(String customRegexPattern,
boolean queryUsesIndexTraversal) {
- if (customRegexPattern == null || customRegexPattern.trim().isEmpty())
{
- LOG.info("Mongo custom regex is disabled");
- return null;
- } else {
- LOG.info("Excluding nodes with paths matching regex: {}",
customRegexPattern);
- var pattern = Pattern.compile(customRegexPattern);
- Bson pathFilter = createPathFilter(List.of(pattern),
queryUsesIndexTraversal);
- return Filters.nor(Filters.regex(NodeDocument.ID, pattern),
pathFilter);
- }
- }
-
- private static Bson descendantsFilter(List<String> paths, boolean
queryUsesIndexTraversal) {
+ private static List<Pattern> toFilterPatterns(List<String> paths) {
if (paths.isEmpty()) {
- return null;
+ return List.of();
}
if (paths.size() == 1 && paths.get(0).equals("/")) {
- return null;
+ return List.of();
}
-
- // The filter for descendants of a list of paths is a series of or
conditions. For each path, we have to build
- // two conditions in two different fields of the documents:
- // _ _id - for non-long paths - In this case, the _id is of the form
"2:/foo/bar"
- // _ _path - for long paths - In this case, the _id is a hash and the
document contains an additional _path
- // field with the path of the document.
- // We use the $in operator with a regular expression to match the
paths.
- //
https://www.mongodb.com/docs/manual/reference/operator/query/in/#use-the--in-operator-with-a-regular-expression
- ArrayList<Pattern> pathPatterns = new ArrayList<>();
- ArrayList<Pattern> idPatterns = new ArrayList<>();
-
+ ArrayList<Pattern> patterns = new ArrayList<>();
for (String path : paths) {
if (!path.endsWith("/")) {
path = path + "/";
}
String quotedPath = Pattern.quote(path);
- idPatterns.add(Pattern.compile("^[0-9]{1,3}:" + quotedPath +
".*$"));
- pathPatterns.add(Pattern.compile("^" + quotedPath + ".*$"));
+ patterns.add(Pattern.compile("^[0-9]{1,3}:" + quotedPath + ".*$"));
}
-
- Bson pathFilter = createPathFilter(pathPatterns,
queryUsesIndexTraversal);
- return Filters.or(Filters.in(NodeDocument.ID, idPatterns), pathFilter);
+ return patterns;
}
- private static Bson createPathFilter(List<Pattern> pattern, boolean
queryUsesIndexTraversal) {
- // If a document has a long path, the _id is replaced by a hash and
the path is stored in an additional _path field.
- // When doing an index scan, it may be more efficient to check that
the _id is in the format of a long path id
- // (that is, numeric prefix followed by ":h") first, before checking
the _path field. The _id
- // is available from the index while the _path field is only available
on the document itself, so checking the
- // _path will force an expensive retrieval of the full document. It is
not guaranteed that Mongo will implement
- // this optimization, but it is adding this additional check to allow
MongoDB to apply this optimization.
- // If the query does a column scan, then Mongo retrieves the full
document from the column store, so we can
- // check the _path directly, which simplifies a bit the query.
- if (queryUsesIndexTraversal) {
- return Filters.and(
- Filters.regex(NodeDocument.ID, LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH, pattern)
- );
+ static List<Pattern> customExcludedPatterns(String customRegexPattern) {
+ if (customRegexPattern == null || customRegexPattern.trim().isEmpty())
{
+ LOG.info("Mongo custom regex is disabled");
+ return List.of();
} else {
- return Filters.in(NodeDocument.PATH, pattern);
+ LOG.info("Excluding nodes with paths matching regex: {}",
customRegexPattern);
+ return List.of(Pattern.compile(customRegexPattern));
}
}
@@ -387,7 +365,7 @@ public class PipelinedMongoDownloadTask implements
Callable<PipelinedMongoDownlo
.build();
MetricsUtils.addMetric(statisticsProvider, reporter,
PipelinedMetrics.OAK_INDEXER_PIPELINED_MONGO_DOWNLOAD_DURATION_SECONDS,
durationMillis / 1000);
MetricsUtils.addMetric(statisticsProvider, reporter,
PipelinedMetrics.OAK_INDEXER_PIPELINED_DOCUMENTS_DOWNLOADED_TOTAL,
documentsDownloadedTotal);
- MetricsUtils.addMetric(statisticsProvider, reporter,
PipelinedMetrics.OAK_INDEXER_PIPELINED_MONGO_DOWNLOAD_ENQUEUE_DELAY_PERCENTAGE,
+ MetricsUtils.addMetric(statisticsProvider, reporter,
PipelinedMetrics.OAK_INDEXER_PIPELINED_MONGO_DOWNLOAD_ENQUEUE_DELAY_PERCENTAGE,
PipelinedUtils.toPercentage(totalEnqueueWaitTimeMillis,
durationMillis)
);
MetricsUtils.addMetricByteSize(statisticsProvider, reporter,
PipelinedMetrics.OAK_INDEXER_PIPELINED_DOCUMENTS_DOWNLOADED_TOTAL_BYTES,
@@ -421,7 +399,7 @@ public class PipelinedMongoDownloadTask implements
Callable<PipelinedMongoDownlo
// That is, download "/", "/content", "/content/dam" for a base path
of "/content/dam". These nodes will not be
// matched by the regex used in the Mongo query, which assumes a
prefix of "???:/content/dam"
MongoFilterPaths mongoFilterPaths = getPathsForRegexFiltering();
- Bson mongoFilter = computeMongoQueryFilter(mongoFilterPaths,
customExcludeEntriesRegex, true);
+ Bson mongoFilter = computeMongoQueryFilter(mongoFilterPaths,
customExcludeEntriesRegex);
if (mongoFilter == null) {
LOG.info("Downloading full repository");
} else {
@@ -516,7 +494,7 @@ public class PipelinedMongoDownloadTask implements
Callable<PipelinedMongoDownlo
// We are downloading potentially a large fraction of the repository,
so using an index scan will be
// inefficient. So we pass the natural hint to force MongoDB to use
natural ordering, that is, column scan
MongoFilterPaths mongoFilterPaths = getPathsForRegexFiltering();
- Bson mongoFilter = computeMongoQueryFilter(mongoFilterPaths,
customExcludeEntriesRegex, false);
+ Bson mongoFilter = computeMongoQueryFilter(mongoFilterPaths,
customExcludeEntriesRegex);
if (mongoFilter == null) {
LOG.info("Downloading full repository from Mongo with natural
order");
FindIterable<NodeDocument> mongoIterable = dbCollection
diff --git
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedIT.java
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedIT.java
index b7f0f7811d..30ab0a3d8a 100644
---
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedIT.java
+++
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedIT.java
@@ -32,6 +32,7 @@ import org.apache.jackrabbit.oak.plugins.document.MongoUtils;
import org.apache.jackrabbit.oak.plugins.document.RevisionVector;
import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore;
import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
+import org.apache.jackrabbit.oak.plugins.document.util.Utils;
import org.apache.jackrabbit.oak.plugins.index.ConsoleIndexingReporter;
import org.apache.jackrabbit.oak.plugins.metric.MetricStatisticsProvider;
import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
@@ -186,7 +187,7 @@ public class PipelinedIT {
"/content/dam/2023|{\"p2\":\"v2023\"}",
"/content/dam/2023/01|{\"p1\":\"v202301\"}",
"/content/dam/2023/02|{}"
- ));
+ ), true);
}
@Test
@@ -214,7 +215,7 @@ public class PipelinedIT {
"/content/dam/2022/02|{\"p1\":\"v202202\"}",
"/content/dam/2022/03|{\"p1\":\"v202203\"}",
"/content/dam/2022/04|{\"p1\":\"v202204\"}"
- ));
+ ), true);
}
@@ -234,7 +235,7 @@ public class PipelinedIT {
"/etc|{}",
"/home|{}",
"/jcr:system|{}"
- ));
+ ), true);
}
@Test
@@ -253,7 +254,7 @@ public class PipelinedIT {
"/etc|{}",
"/home|{}",
"/jcr:system|{}"
- ));
+ ), true);
}
@Test
@@ -283,8 +284,7 @@ public class PipelinedIT {
"/content/dam/2022/02/04|{\"p1\":\"v20220204\"}",
"/content/dam/2022/03|{\"p1\":\"v202203\"}",
"/content/dam/2022/04|{\"p1\":\"v202204\"}"
-
- ));
+ ), true);
}
@Test
@@ -305,7 +305,7 @@ public class PipelinedIT {
"/content/dam/2023/01|{\"p1\":\"v202301\"}",
"/content/dam/2023/02|{}",
"/content/dam/2023/02/28|{\"p1\":\"v20230228\"}"
- ));
+ ), true);
}
@Test
@@ -344,7 +344,7 @@ public class PipelinedIT {
// The list above has the longest paths first, reverse it to match the
order in the FFS
Collections.reverse(expected);
- testSuccessfulDownload(pathPredicate, pathFilters, expected);
+ testSuccessfulDownload(pathPredicate, pathFilters, expected, false);
}
@@ -454,10 +454,10 @@ public class PipelinedIT {
private void testSuccessfulDownload(Predicate<String> pathPredicate,
List<PathFilter> pathFilters)
throws CommitFailedException, IOException {
- testSuccessfulDownload(pathPredicate, pathFilters, EXPECTED_FFS);
+ testSuccessfulDownload(pathPredicate, pathFilters, EXPECTED_FFS,
false);
}
- private void testSuccessfulDownload(Predicate<String> pathPredicate,
List<PathFilter> pathFilters, List<String> expected)
+ private void testSuccessfulDownload(Predicate<String> pathPredicate,
List<PathFilter> pathFilters, List<String> expected, boolean ignoreLongPaths)
throws CommitFailedException, IOException {
Backend rwStore = createNodeStore(false);
createContent(rwStore.documentNodeStore);
@@ -468,7 +468,19 @@ public class PipelinedIT {
File file = pipelinedStrategy.createSortedStoreFile();
assertTrue(file.exists());
- assertEquals(expected, Files.readAllLines(file.toPath()));
+ List<String> result = Files.readAllLines(file.toPath());
+ if (ignoreLongPaths) {
+ // Remove the long paths from the result. The filter on Mongo is
best-effort, it will download long path
+ // documents, even if they do not match the includedPaths.
+ result = result.stream()
+ .filter(s -> {
+ var name = s.split("\\|")[0];
+ return name.length() < Utils.PATH_LONG;
+ })
+ .collect(Collectors.toList());
+
+ }
+ assertEquals(expected, result);
assertMetrics();
}
diff --git
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTaskTest.java
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTaskTest.java
index 46060b7985..3e87830560 100644
---
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTaskTest.java
+++
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedMongoDownloadTaskTest.java
@@ -51,8 +51,10 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
+import static
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
@@ -297,22 +299,14 @@ public class PipelinedMongoDownloadTaskTest {
@Test
public void createCustomExcludeEntriesFilter() {
-
assertNull(PipelinedMongoDownloadTask.createCustomExcludedEntriesFilter(null,
true));
-
assertNull(PipelinedMongoDownloadTask.createCustomExcludedEntriesFilter("",
true));
+
assertTrue(PipelinedMongoDownloadTask.customExcludedPatterns(null).isEmpty());
+
assertTrue(PipelinedMongoDownloadTask.customExcludedPatterns("").isEmpty());
Pattern p = Pattern.compile("^[0-9]{1,3}:/a/b.*$");
- var expectedBson = Filters.nor(
- Filters.regex(NodeDocument.ID, p),
- Filters.and(
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH, p)
- )
- );
+ var actualListOfPatterns =
PipelinedMongoDownloadTask.customExcludedPatterns("^[0-9]{1,3}:/a/b.*$");
+ assertEquals(1, actualListOfPatterns.size());
-
- var actualBson =
PipelinedMongoDownloadTask.createCustomExcludedEntriesFilter("^[0-9]{1,3}:/a/b.*$",
true);
-
- assertBsonEquals(expectedBson, actualBson);
+ assertEquals(p.toString(), actualListOfPatterns.get(0).toString());
}
@Test
@@ -321,8 +315,7 @@ public class PipelinedMongoDownloadTaskTest {
assertNull(
PipelinedMongoDownloadTask.computeMongoQueryFilter(
MongoFilterPaths.DOWNLOAD_ALL,
- null,
- true
+ null
)
);
}
@@ -332,23 +325,14 @@ public class PipelinedMongoDownloadTaskTest {
// Path filter but no exclude filter
var actual = PipelinedMongoDownloadTask.computeMongoQueryFilter(
new MongoFilterPaths(List.of("/"), List.of("/excluded1",
"/content/excluded2")),
- null,
- true
+ null
);
// The generated filter should not include any condition to include
the descendants of /
- var expected = Filters.nor(
- Filters.or(
- Filters.in(NodeDocument.ID,
- Pattern.compile("^[0-9]{1,3}:" +
Pattern.quote("/excluded1/") + ".*$"),
- Pattern.compile("^[0-9]{1,3}:" +
Pattern.quote("/content/excluded2/") + ".*$")),
- Filters.and(
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH,
- Pattern.compile("^" +
Pattern.quote("/excluded1/") + ".*$"),
- Pattern.compile("^" +
Pattern.quote("/content/excluded2/") + ".*$"))
- )
- )
- );
+ var expected =
+ Filters.nin(NodeDocument.ID,
+ Pattern.compile("^[0-9]{1,3}:" +
Pattern.quote("/excluded1/") + ".*$"),
+ Pattern.compile("^[0-9]{1,3}:" +
Pattern.quote("/content/excluded2/") + ".*$")
+ );
assertBsonEquals(expected, actual);
}
@@ -358,15 +342,11 @@ public class PipelinedMongoDownloadTaskTest {
// Path filter but no exclude filter
var actual = PipelinedMongoDownloadTask.computeMongoQueryFilter(
new MongoFilterPaths(List.of("/parent"), List.of()),
- null,
- true
+ null
);
- var expected = Filters.or(
- Filters.in(NodeDocument.ID, Pattern.compile("^[0-9]{1,3}:" +
Pattern.quote("/parent/") + ".*$")),
- Filters.and(
- Filters.in(NodeDocument.PATH, Pattern.compile("^" +
Pattern.quote("/parent/") + ".*$")),
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN)
- )
+ var expected = Filters.in(NodeDocument.ID,
+ Pattern.compile("^[0-9]{1,3}:" + Pattern.quote("/parent/") +
".*$"),
+ LONG_PATH_ID_PATTERN
);
assertBsonEquals(expected, actual);
}
@@ -375,47 +355,24 @@ public class PipelinedMongoDownloadTaskTest {
public void computeMongoQueryFilterNoPathFilterWithExcludeFilter() {
var actual = PipelinedMongoDownloadTask.computeMongoQueryFilter(
MongoFilterPaths.DOWNLOAD_ALL,
- "^[0-9]{1,3}:/a/b.*$",
- true
- );
- Pattern excludePattern = Pattern.compile("^[0-9]{1,3}:/a/b.*$");
- assertBsonEquals(
- Filters.nor(
- Filters.regex(NodeDocument.ID, excludePattern),
- Filters.and(
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH, excludePattern)
- )
- ),
- actual
+ "^[0-9]{1,3}:/a/b.*$"
);
+ Bson expectedFilter = Filters.nin(NodeDocument.ID,
Pattern.compile("^[0-9]{1,3}:/a/b.*$"));
+ assertBsonEquals(expectedFilter, actual);
}
@Test
public void computeMongoQueryFilterWithPathFilterWithExcludeFilter() {
var actual = PipelinedMongoDownloadTask.computeMongoQueryFilter(
new MongoFilterPaths(List.of("/parent"), List.of()),
- "^[0-9]{1,3}:/a/b.*$",
- true
+ "^[0-9]{1,3}:/a/b.*$"
);
Pattern excludesPattern = Pattern.compile("^[0-9]{1,3}:/a/b.*$");
var expected =
Filters.and(
- Filters.or(
- Filters.in(NodeDocument.ID,
Pattern.compile("^[0-9]{1,3}:" + Pattern.quote("/parent/") + ".*$")),
- Filters.and(
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH,
Pattern.compile("^" + Pattern.quote("/parent/") + ".*$"))
- )
- ),
- Filters.nor(
- Filters.regex(NodeDocument.ID,
excludesPattern),
- Filters.and(
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH,
excludesPattern)
- )
- )
+ Filters.in(NodeDocument.ID,
Pattern.compile("^[0-9]{1,3}:" + Pattern.quote("/parent/") + ".*$"),
LONG_PATH_ID_PATTERN),
+ Filters.nin(NodeDocument.ID, excludesPattern)
);
assertBsonEquals(expected, actual);
}
@@ -424,21 +381,17 @@ public class PipelinedMongoDownloadTaskTest {
public void
computeMongoQueryFilterWithPathFilterWithExcludeFilterAndNaturalOrderTraversal()
{
var actual = PipelinedMongoDownloadTask.computeMongoQueryFilter(
new MongoFilterPaths(List.of("/parent"), List.of()),
- "^[0-9]{1,3}:/a/b.*$",
- false
+ "^[0-9]{1,3}:/a/b.*$"
);
Pattern excludePattern = Pattern.compile("^[0-9]{1,3}:/a/b.*$");
var expected =
Filters.and(
- Filters.or(
- Filters.in(NodeDocument.ID,
Pattern.compile("^[0-9]{1,3}:" + Pattern.quote("/parent/") + ".*$")),
- Filters.in(NodeDocument.PATH,
Pattern.compile("^" + Pattern.quote("/parent/") + ".*$"))
+ Filters.in(NodeDocument.ID,
+ Pattern.compile("^[0-9]{1,3}:" +
Pattern.quote("/parent/") + ".*$"),
+ LONG_PATH_ID_PATTERN
),
- Filters.nor(
- Filters.regex(NodeDocument.ID, excludePattern),
- Filters.in(NodeDocument.PATH, excludePattern)
- )
+ Filters.nin(NodeDocument.ID, excludePattern)
);
assertBsonEquals(expected, actual);
}
@@ -447,24 +400,12 @@ public class PipelinedMongoDownloadTaskTest {
public void
computeMongoQueryFilterWithPathFilterWithExcludeFilterAndNaturalColumnTraversal()
{
var actual = PipelinedMongoDownloadTask.computeMongoQueryFilter(
new MongoFilterPaths(List.of("/"), List.of("/excluded")),
- "^[0-9]{1,3}:/a/b.*$",
- false
+ "^[0-9]{1,3}:/a/b.*$"
);
Pattern excludePattern = Pattern.compile("^[0-9]{1,3}:/a/b.*$");
var expected =
- Filters.and(
- Filters.nor(
- Filters.or(
- Filters.in(NodeDocument.ID,
Pattern.compile("^[0-9]{1,3}:" + Pattern.quote("/excluded/") + ".*$")),
- Filters.in(NodeDocument.PATH,
Pattern.compile("^" + Pattern.quote("/excluded/") + ".*$"))
- )
- ),
- Filters.nor(
- Filters.regex(NodeDocument.ID, excludePattern),
- Filters.in(NodeDocument.PATH, excludePattern)
- )
- );
+ Filters.nin(NodeDocument.ID, Pattern.compile("^[0-9]{1,3}:" +
Pattern.quote("/excluded/") + ".*$"), excludePattern);
assertBsonEquals(expected, actual);
}
@@ -472,30 +413,14 @@ public class PipelinedMongoDownloadTaskTest {
public void
computeMongoQueryFilterWithPathFilterWithExcludeFilterAndNaturalIndexTraversal()
{
var actual = PipelinedMongoDownloadTask.computeMongoQueryFilter(
new MongoFilterPaths(List.of("/"), List.of("/excluded")),
- "^[0-9]{1,3}:/a/b.*$",
- true
+ "^[0-9]{1,3}:/a/b.*$"
);
Pattern excludePattern = Pattern.compile("^[0-9]{1,3}:/a/b.*$");
- var expected =
- Filters.and(
- Filters.nor(
- Filters.or(
- Filters.in(NodeDocument.ID,
Pattern.compile("^[0-9]{1,3}:" + Pattern.quote("/excluded/") + ".*$")),
- Filters.and(
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH,
Pattern.compile("^" + Pattern.quote("/excluded/") + ".*$"))
- )
- )
- ),
- Filters.nor(
- Filters.regex(NodeDocument.ID, excludePattern),
- Filters.and(
- Filters.regex(NodeDocument.ID,
PipelinedMongoDownloadTask.LONG_PATH_ID_PATTERN),
- Filters.in(NodeDocument.PATH,
excludePattern)
- )
- )
- );
+ var expected = Filters.nin(NodeDocument.ID,
+ Pattern.compile("^[0-9]{1,3}:" + Pattern.quote("/excluded/") +
".*$"),
+ excludePattern
+ );
assertBsonEquals(expected, actual);
}