This is an automated email from the ASF dual-hosted git repository.
fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 5aa3c1b4dd oak-search-elastic: field values should be deduplicated
(#1006)
5aa3c1b4dd is described below
commit 5aa3c1b4dd2fbbf3f74adfc572e1318922442d8d
Author: Fabrizio Fortino <[email protected]>
AuthorDate: Tue Jun 27 22:44:19 2023 +0200
oak-search-elastic: field values should be deduplicated (#1006)
---
.../index/elastic/index/ElasticDocument.java | 10 ++++-----
.../index/elastic/ElasticAbstractQueryTest.java | 15 ++++++++++++++
.../plugins/index/elastic/ElasticContentTest.java | 24 ++++++++++++++++++++++
3 files changed, 43 insertions(+), 6 deletions(-)
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
index f90b86d211..c81d2c28d7 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
@@ -28,11 +28,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
-import java.util.ArrayList;
import java.util.LinkedHashSet;
import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils.toDoubles;
@@ -44,7 +42,7 @@ public class ElasticDocument {
private final Set<String> fulltext;
private final Set<String> suggest;
private final Set<String> spellcheck;
- private final Map<String, List<Object>> properties;
+ private final Map<String, Set<Object>> properties;
private final Map<String, Object> similarityFields;
private final Map<String, Map<String, Double>> dynamicBoostFields;
private final Set<String> similarityTags;
@@ -81,7 +79,7 @@ public class ElasticDocument {
// ref: https://www.elastic.co/blog/strings-are-dead-long-live-strings
// (interpretation of date etc:
https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html)
void addProperty(String fieldName, Object value) {
- properties.computeIfAbsent(fieldName, s -> new
ArrayList<>()).add(value);
+ properties.computeIfAbsent(fieldName, s -> new
LinkedHashSet<>()).add(value);
}
void addSimilarityField(String name, Blob value) throws IOException {
@@ -146,8 +144,8 @@ public class ElasticDocument {
for (Map.Entry<String, Object> simProp:
similarityFields.entrySet()) {
builder.field(simProp.getKey(), simProp.getValue());
}
- for (Map.Entry<String, List<Object>> prop :
properties.entrySet()) {
- builder.field(prop.getKey(), prop.getValue().size() == 1 ?
prop.getValue().get(0) : prop.getValue());
+ for (Map.Entry<String, Set<Object>> prop :
properties.entrySet()) {
+ builder.field(prop.getKey(), prop.getValue().size() == 1 ?
prop.getValue().iterator().next() : prop.getValue());
}
if (!similarityTags.isEmpty()) {
builder.field(ElasticIndexDefinition.SIMILARITY_TAGS,
similarityTags);
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
index 27f1e3cb03..57fd31b002 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
@@ -16,6 +16,8 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic;
+import co.elastic.clients.elasticsearch.core.GetRequest;
+import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.oak.InitialContent;
import org.apache.jackrabbit.oak.Oak;
@@ -240,6 +242,19 @@ public abstract class ElasticAbstractQueryTest extends
AbstractQueryTest {
}
}
+ protected ObjectNode getDocument(Tree index, String id) {
+ ElasticIndexDefinition esIdxDef = getElasticIndexDefinition(index);
+
+ GetRequest get = GetRequest.of(r -> r
+ .index(esIdxDef.getIndexAlias())
+ .id(id));
+ try {
+ return esConnection.getClient().get(get,
ObjectNode.class).source();
+ } catch (ElasticsearchException | IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
private ElasticIndexDefinition getElasticIndexDefinition(Tree index) {
return new ElasticIndexDefinition(
nodeStore.getRoot(),
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
index 60e757389d..52e0c96ad0 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elastic;
+import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import
org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
@@ -258,4 +259,27 @@ public class ElasticContentTest extends
ElasticAbstractQueryTest {
assertEventually(() -> assertQuery("select [jcr:path] from [nt:base]
where [a] = 'text'", results));
}
+
+ @Test
+ public void deduplicateFields() throws Exception {
+ IndexDefinitionBuilder builder = createIndex("a").noAsync();
+ builder.indexRule("nt:base").property("a").propertyIndex();
+ Tree index = setIndex(UUID.randomUUID().toString(), builder);
+ root.commit();
+
+ Tree content = root.getTree("/").addChild("content");
+ content.addChild("indexed1").setProperty("a", List.of("foo", "foo"),
Type.STRINGS);
+ content.addChild("indexed2").setProperty("a", List.of("foo", "bar",
"foo"), Type.STRINGS);
+ root.commit();
+
+ assertEventually(() -> {
+ ObjectNode indexed1 = getDocument(index, "/content/indexed1");
+ assertThat(indexed1.get("a").asText(), equalTo("foo"));
+
+ ObjectNode indexed2 = getDocument(index, "/content/indexed2");
+ assertThat(indexed2.get("a").size(), equalTo(2));
+ assertThat(indexed2.get("a").get(0).asText(), equalTo("foo"));
+ assertThat(indexed2.get("a").get(1).asText(), equalTo("bar"));
+ });
+ }
}