This is an automated email from the ASF dual-hosted git repository.

fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 5aa3c1b4dd oak-search-elastic: field values should be deduplicated 
(#1006)
5aa3c1b4dd is described below

commit 5aa3c1b4dd2fbbf3f74adfc572e1318922442d8d
Author: Fabrizio Fortino <[email protected]>
AuthorDate: Tue Jun 27 22:44:19 2023 +0200

    oak-search-elastic: field values should be deduplicated (#1006)
---
 .../index/elastic/index/ElasticDocument.java       | 10 ++++-----
 .../index/elastic/ElasticAbstractQueryTest.java    | 15 ++++++++++++++
 .../plugins/index/elastic/ElasticContentTest.java  | 24 ++++++++++++++++++++++
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
index f90b86d211..c81d2c28d7 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
@@ -28,11 +28,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.List;
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Set;
-import java.util.ArrayList;
 import java.util.LinkedHashSet;
 
 import static 
org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils.toDoubles;
@@ -44,7 +42,7 @@ public class ElasticDocument {
     private final Set<String> fulltext;
     private final Set<String> suggest;
     private final Set<String> spellcheck;
-    private final Map<String, List<Object>> properties;
+    private final Map<String, Set<Object>> properties;
     private final Map<String, Object> similarityFields;
     private final Map<String, Map<String, Double>> dynamicBoostFields;
     private final Set<String> similarityTags;
@@ -81,7 +79,7 @@ public class ElasticDocument {
     // ref: https://www.elastic.co/blog/strings-are-dead-long-live-strings
     // (interpretation of date etc: 
https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html)
     void addProperty(String fieldName, Object value) {
-        properties.computeIfAbsent(fieldName, s -> new 
ArrayList<>()).add(value);
+        properties.computeIfAbsent(fieldName, s -> new 
LinkedHashSet<>()).add(value);
     }
 
     void addSimilarityField(String name, Blob value) throws IOException {
@@ -146,8 +144,8 @@ public class ElasticDocument {
                 for (Map.Entry<String, Object> simProp: 
similarityFields.entrySet()) {
                     builder.field(simProp.getKey(), simProp.getValue());
                 }
-                for (Map.Entry<String, List<Object>> prop : 
properties.entrySet()) {
-                    builder.field(prop.getKey(), prop.getValue().size() == 1 ? 
prop.getValue().get(0) : prop.getValue());
+                for (Map.Entry<String, Set<Object>> prop : 
properties.entrySet()) {
+                    builder.field(prop.getKey(), prop.getValue().size() == 1 ? 
prop.getValue().iterator().next() : prop.getValue());
                 }
                 if (!similarityTags.isEmpty()) {
                     builder.field(ElasticIndexDefinition.SIMILARITY_TAGS, 
similarityTags);
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
index 27f1e3cb03..57fd31b002 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elastic;
 
+import co.elastic.clients.elasticsearch.core.GetRequest;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.InitialContent;
 import org.apache.jackrabbit.oak.Oak;
@@ -240,6 +242,19 @@ public abstract class ElasticAbstractQueryTest extends 
AbstractQueryTest {
                }
     }
 
+    protected ObjectNode getDocument(Tree index, String id) {
+        ElasticIndexDefinition esIdxDef = getElasticIndexDefinition(index);
+
+        GetRequest get = GetRequest.of(r -> r
+                .index(esIdxDef.getIndexAlias())
+                .id(id));
+        try {
+            return esConnection.getClient().get(get, 
ObjectNode.class).source();
+        } catch (ElasticsearchException | IOException e) {
+            throw new IllegalStateException(e);
+        }
+    }
+
     private ElasticIndexDefinition getElasticIndexDefinition(Tree index) {
         return new ElasticIndexDefinition(
                 nodeStore.getRoot(),
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
index 60e757389d..52e0c96ad0 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elastic;
 
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
 import 
org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
@@ -258,4 +259,27 @@ public class ElasticContentTest extends 
ElasticAbstractQueryTest {
 
         assertEventually(() -> assertQuery("select [jcr:path] from [nt:base] 
where [a] = 'text'", results));
     }
+
+    @Test
+    public void deduplicateFields() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        builder.indexRule("nt:base").property("a").propertyIndex();
+        Tree index = setIndex(UUID.randomUUID().toString(), builder);
+        root.commit();
+
+        Tree content = root.getTree("/").addChild("content");
+        content.addChild("indexed1").setProperty("a", List.of("foo", "foo"), 
Type.STRINGS);
+        content.addChild("indexed2").setProperty("a", List.of("foo", "bar", 
"foo"), Type.STRINGS);
+        root.commit();
+
+        assertEventually(() -> {
+            ObjectNode indexed1 = getDocument(index, "/content/indexed1");
+            assertThat(indexed1.get("a").asText(), equalTo("foo"));
+
+            ObjectNode indexed2 = getDocument(index, "/content/indexed2");
+            assertThat(indexed2.get("a").size(), equalTo(2));
+            assertThat(indexed2.get("a").get(0).asText(), equalTo("foo"));
+            assertThat(indexed2.get("a").get(1).asText(), equalTo("bar"));
+        });
+    }
 }

Reply via email to