This is an automated email from the ASF dual-hosted git repository. mkataria pushed a commit to branch OAK-11719 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 6fa9f0c090c5cef57c7971eb07c830bee6ce71c0 Author: Mohit Kataria <tiho...@gmail.com> AuthorDate: Wed May 14 04:58:50 2025 +0530 OAK-11719: vector query should not break normal queries --- .../oak/query/ast/FullTextSearchImpl.java | 7 +- .../plugins/index/lucene/LucenePropertyIndex.java | 9 +++ .../index/lucene/LuceneFullTextIndexTest.java | 5 ++ .../index/lucene/LuceneTestRepositoryBuilder.java | 2 + .../index/elastic/ElasticFullTextIndexTest.java | 5 ++ .../oak/plugins/index/FullTextIndexCommonTest.java | 81 +++++++++++++--------- 6 files changed, 73 insertions(+), 36 deletions(-) diff --git a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java index 2f5c88c57c..288afa54c8 100644 --- a/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java +++ b/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/FullTextSearchImpl.java @@ -144,10 +144,9 @@ public class FullTextSearchImpl extends ConstraintImpl { // e.g. ?{"inferenceModelConfig": "ada-test-model"}?little red fox // So here we split the query into text part of query and inferenceConfig part of query. // Afterwards we only parse text part of query as this part of query is what we want to search. - if (query.getSettings().isInferenceEnabled()) { - VectorQuery vectorQuery = new VectorQuery(rawText); - queryText = vectorQuery.getQueryText(); - } + // We are explicitly removing vectorQueryConfig from raw query + VectorQuery vectorQuery = new VectorQuery(rawText); + queryText = vectorQuery.getQueryText(); FullTextExpression e = FullTextParser.parse(p2, queryText); return new FullTextContains(p2, rawText, e); } catch (ParseException e) { diff --git a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java index be0bcb19fe..51be661b9f 100644 --- a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java +++ b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java @@ -83,6 +83,7 @@ import org.apache.jackrabbit.oak.spi.query.Filter; import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction; import org.apache.jackrabbit.oak.spi.query.QueryConstants; import org.apache.jackrabbit.oak.spi.query.QueryLimits; +import org.apache.jackrabbit.oak.spi.query.fulltext.VectorQuery; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; import org.apache.lucene.analysis.Analyzer; @@ -1477,6 +1478,14 @@ public class LucenePropertyIndex extends FulltextIndex { private boolean visitTerm(String propertyName, String text, String boost, boolean not) { String p = getLuceneFieldName(propertyName, pr); + // below condition is for contains(*, 'xyz') + if (propertyName == null) { + // Lucene indexes don't support inference, so we should remove queryInferenceConfig + // from query before evaluating it. + VectorQuery vectorQuery = new VectorQuery(text); + text = vectorQuery.getQueryText(); + } + Query q = tokenToQuery(text, p, pr, analyzer, augmentor); if (q == null) { return false; diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java index fe379982fb..491f2e3d12 100644 --- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneFullTextIndexTest.java @@ -45,6 +45,11 @@ public class LuceneFullTextIndexTest extends FullTextIndexCommonTest { setTraversalEnabled(false); } + @Override + protected String[] getPrefixes() { + return new String[]{"", "?{}?", "?"}; + } + @After public void shutdownExecutor() { executorService.shutdown(); diff --git a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java index 48358551ce..21bea835d1 100644 --- a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java +++ b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneTestRepositoryBuilder.java @@ -58,6 +58,8 @@ public class LuceneTestRepositoryBuilder extends TestRepositoryBuilder { resultCountingIndexProvider = new ResultCountingIndexProvider(indexProvider); queryEngineSettings = new QueryEngineSettings(); + // enabling inference to check impact on all tests. + queryEngineSettings.setInferenceEnabled(true); optionalEditorProvider = new TestUtil.OptionalEditorProvider(); asyncIndexUpdate.setCorruptIndexHandler(trackingCorruptIndexHandler); } diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java index 457add8e22..4726f33c1f 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexTest.java @@ -70,4 +70,9 @@ public class ElasticFullTextIndexTest extends FullTextIndexCommonTest { }); } + @Override + protected String[] getPrefixes() { + return new String[]{"", "?{}?", "?"}; + } + } diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java index edc24bcf24..ceef6b0af8 100644 --- a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java +++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java @@ -152,13 +152,15 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { c.setProperty("propb", "Hello folks"); test.addChild("d").setProperty("propb", "baz"); root.commit(); - - assertEventually(() -> { - assertQuery("//*[jcr:contains(., 'Hello')]", XPATH, List.of("/test/c", "/test/b", "/test/a"), true, true); - assertQuery("//*[jcr:contains(., 'Hello')] order by @jcr:score ascending", XPATH, + for (String prefix : getPrefixes()) { + assertEventually(() -> { + assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]", XPATH, + List.of("/test/c", "/test/b", "/test/a"), true, true); + assertQuery("//*[jcr:contains(., '" + prefix + "Hello')] order by @jcr:score ascending", XPATH, List.of("/test/a", "/test/b", "/test/c"), true, true); - assertQuery("//*[jcr:contains(., 'people')]", XPATH, List.of("/test/c")); - }); + assertQuery("//*[jcr:contains(., '" + prefix + "people')]", XPATH, List.of("/test/c")); + }); + } } @Test @@ -197,11 +199,13 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { d.setProperty("b", "world"); root.commit(); - assertEventually(() -> { - assertQuery("//*[jcr:contains(., 'Hello')]", XPATH, List.of("/test/nodea", "/test/nodec", "/test/noded")); - assertQuery("//*[jcr:contains(., 'hello world')]", XPATH, List.of("/test/nodec", "/test/noded")); - assertQuery("//*[jcr:contains(., 'hello OR world')]", XPATH, List.of("/test/nodea", "/test/nodeb", "/test/nodec", "/test/noded")); - }); + for (String prefix : getPrefixes()) { + assertEventually(() -> { + assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]", XPATH, List.of("/test/nodea", "/test/nodec", "/test/noded")); + assertQuery("//*[jcr:contains(., '" + prefix + "hello world')]", XPATH, List.of("/test/nodec", "/test/noded")); + assertQuery("//*[jcr:contains(., '" + prefix + "hello OR world')]", XPATH, List.of("/test/nodea", "/test/nodeb", "/test/nodec", "/test/noded")); + }); + } } @Test @@ -224,10 +228,12 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { d.setProperty("b", "world"); root.commit(); - assertEventually(() -> { - assertQuery("//*[jcr:contains(., 'Hello')]", XPATH, List.of("/test/a", "/test/c", "/test/d")); - assertQuery("//*[jcr:contains(., 'hello world')]", XPATH, List.of("/test/c", "/test/d")); - }); + for (String prefix : getPrefixes()) { + assertEventually(() -> { + assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]", XPATH, List.of("/test/a", "/test/c", "/test/d")); + assertQuery("//*[jcr:contains(., '" + prefix + "hello world')]", XPATH, List.of("/test/c", "/test/d")); + }); + } } @Test @@ -250,10 +256,12 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { d.setProperty("b", "world"); root.commit(); - assertEventually(() -> { - assertQuery("//*[jcr:contains(., 'Hello')]", XPATH, List.of("/test/nodea", "/test/nodec", "/test/noded")); - assertQuery("//*[jcr:contains(., 'hello world')]", XPATH, List.of("/test/nodec", "/test/noded")); - }); + for (String prefix : getPrefixes()) { + assertEventually(() -> { + assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]", XPATH, List.of("/test/nodea", "/test/nodec", "/test/noded")); + assertQuery("//*[jcr:contains(., '" + prefix + "hello world')]", XPATH, List.of("/test/nodec", "/test/noded")); + }); + } } /* @@ -280,10 +288,12 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { d.setProperty("b", "world"); root.commit(); - assertEventually(() -> { - assertQuery("//*[jcr:contains(., 'Hello')]", XPATH, List.of("/test/nodec", "/test/noded")); - assertQuery("//*[jcr:contains(., 'hello world')]", XPATH, List.of("/test/nodec")); - }); + for (String prefix : getPrefixes()) { + assertEventually(() -> { + assertQuery("//*[jcr:contains(., '" + prefix + "Hello')]", XPATH, List.of("/test/nodec", "/test/noded")); + assertQuery("//*[jcr:contains(., '" + prefix + "hello world')]", XPATH, List.of("/test/nodec")); + }); + } } @Test @@ -321,8 +331,10 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { index.setProperty(FulltextIndexConstants.PROP_REFRESH_DEFN, true); root.commit(); - assertEventually(() -> - assertQuery("//*[jcr:contains(., 'jpg')]", XPATH, List.of("/test/a"))); + for (String prefix : getPrefixes()) { + assertEventually(() -> + assertQuery("//*[jcr:contains(., '" + prefix + "jpg')]", XPATH, List.of("/test/a"))); + } } @Test @@ -346,14 +358,16 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { root.commit(); - assertEventually(() -> { - assertQuery("//*[jcr:contains(., 'foo')]", XPATH, List.of("/test/a")); - assertQuery("//*[jcr:contains(., '2025')]", XPATH, List.of("/test/b")); - assertQuery("//*[jcr:contains(., '123')]", XPATH, List.of("/test/c")); - assertQuery("//*[jcr:contains(., '456.78')]", XPATH, List.of("/test/d")); - assertQuery("//*[jcr:contains(., 'true')]", XPATH, List.of("/test/e")); + for (String prefix : getPrefixes()) { + assertEventually(() -> { + assertQuery("//*[jcr:contains(., '" + prefix + "foo')]", XPATH, List.of("/test/a")); + assertQuery("//*[jcr:contains(., '" + prefix + "2025')]", XPATH, List.of("/test/b")); + assertQuery("//*[jcr:contains(., '" + prefix + "123')]", XPATH, List.of("/test/c")); + assertQuery("//*[jcr:contains(., '" + prefix + "456.78')]", XPATH, List.of("/test/d")); + assertQuery("//*[jcr:contains(., '" + prefix + "true')]", XPATH, List.of("/test/e")); } - ); + ); + } } protected void assertEventually(Runnable r) { @@ -396,4 +410,7 @@ public abstract class FullTextIndexCommonTest extends AbstractQueryTest { return executeQuery(explain, lang).get(0); } + protected String[] getPrefixes() { + return new String[]{""}; + } }