This is an automated email from the ASF dual-hosted git repository.
fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 6c2f14621d OAK-11536: elastic synonym filter has to be lenient to
incorrect rules (#2125)
6c2f14621d is described below
commit 6c2f14621da3a7091521fe2594acd6e48c379498
Author: Fabrizio Fortino <[email protected]>
AuthorDate: Mon Mar 3 16:19:17 2025 +0100
OAK-11536: elastic synonym filter has to be lenient to incorrect rules
(#2125)
* OAK-11536: elastic synonym filter has to be lenient to incorrect rules
* OAK-11536: improve code reuse in ElasticIndexWriter
* OAK-11536: fix import error after merge
---
.../elastic/index/ElasticCustomAnalyzerMappings.java | 11 ++++++++---
.../plugins/index/elastic/index/ElasticIndexWriter.java | 16 +++-------------
.../oak/plugins/index/FullTextAnalyzerCommonTest.java | 6 +++++-
3 files changed, 16 insertions(+), 17 deletions(-)
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzerMappings.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzerMappings.java
index 97204a5556..640c85e9c9 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzerMappings.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzerMappings.java
@@ -187,9 +187,14 @@ public class ElasticCustomAnalyzerMappings {
reKey.apply(luceneParams, Map.of("mapping", "mappings"))
);
- LUCENE_ELASTIC_TRANSFORMERS.put(SynonymFilterFactory.class,
luceneParams ->
- reKey.apply(luceneParams, Map.of("tokenizerFactory",
"tokenizer"))
- );
+ LUCENE_ELASTIC_TRANSFORMERS.put(SynonymFilterFactory.class,
luceneParams -> {
+ // lucene does not support this option (see
UNSUPPORTED_LUCENE_PARAMETERS) and it's lenient by default
+ // elastic is not lenient by default, so we need to set it to true
in case it's not present
+ if (!luceneParams.containsKey("lenient")) {
+ luceneParams.put("lenient", "true");
+ }
+ return reKey.apply(luceneParams, Map.of("tokenizerFactory",
"tokenizer"));
+ });
LUCENE_ELASTIC_TRANSFORMERS.put(KeywordMarkerFilterFactory.class,
luceneParams ->
reKey.apply(luceneParams, Map.of("protected", "keywords"))
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
index bb9149c0fd..59d9ba3873 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
@@ -16,7 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic.index;
-import co.elastic.clients.elasticsearch._types.AcknowledgedResponseBase;
+import co.elastic.clients.elasticsearch._types.AcknowledgedResponse;
import co.elastic.clients.elasticsearch._types.ElasticsearchException;
import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
import co.elastic.clients.elasticsearch.indices.CreateIndexResponse;
@@ -186,11 +186,7 @@ class ElasticIndexWriter implements
FulltextIndexWriter<ElasticDocument> {
}
final CreateIndexRequest request =
ElasticIndexHelper.createIndexRequest(indexName, indexDefinition);
- if (LOG.isDebugEnabled()) {
- StringBuilder sb = new StringBuilder();
- JsonpUtils.toString(request, sb);
- LOG.debug("Creating Index with request {}", sb);
- }
+ LOG.debug("Creating Index with request {}", request);
// create the new index
try {
final CreateIndexResponse response = esClient.create(request);
@@ -268,13 +264,7 @@ class ElasticIndexWriter implements
FulltextIndexWriter<ElasticDocument> {
deleteOldIndices(client, aliasResponse.result().keySet());
}
- private void checkResponseAcknowledgement(AcknowledgedResponseBase
response, String exceptionMessage) {
- if (!response.acknowledged()) {
- throw new IllegalStateException(exceptionMessage);
- }
- }
-
- private void checkResponseAcknowledgement(CreateIndexResponse response,
String exceptionMessage) {
+ private void checkResponseAcknowledgement(AcknowledgedResponse response,
String exceptionMessage) {
if (!response.acknowledged()) {
throw new IllegalStateException(exceptionMessage);
}
diff --git
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
index b052f6268c..d6db2b511d 100644
---
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
+++
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java
@@ -1005,7 +1005,11 @@ public abstract class FullTextAnalyzerCommonTest extends
AbstractQueryTest {
Tree synFilter = addFilter(filters, "Synonym");
synFilter.setProperty("synonyms", "syn.txt");
synFilter.addChild("syn.txt").addChild(JCR_CONTENT)
- .setProperty(JCR_DATA, "plane, airplane,
aircraft\nflies=>scars");
+ .setProperty(JCR_DATA, "plane, airplane, aircraft\n" +
+ "flies=>scars\n" +
+ // this rule is incorrect: "term: + was completely
eliminated by analyzer"
+ // by default, the configuration has to be lenient
and not fail on such cases
+ "plus,+,addition");
});
Tree content = root.getTree("/").addChild("content");