This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-11568
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 9635838aa471fb4cad5b2e2bd8963dbb62ad4a66
Author: Thomas Mueller <[email protected]>
AuthorDate: Wed Mar 19 16:22:28 2025 +0100

    OAK-11568 Elastic: improved compatibility for aggregation definitions
---
 .../index/elastic/ElasticIndexProviderService.java | 69 +---------------------
 .../index/elastic/index/ElasticCustomAnalyzer.java | 63 +++++++++++++++++---
 .../index/elastic/index/ElasticDocument.java       |  2 +
 .../elastic/index/ElasticIndexEditorContext.java   |  2 +-
 .../index/elastic/ElasticInferenceTest.java        |  5 +-
 .../oak/plugins/index/elastic/ElasticPerfTest.java |  5 +-
 .../plugins/index/elastic/ElasticTestServer.java   |  1 +
 .../elastic/index/ElasticIndexHelperTest.java      | 33 +++++++++++
 .../oak/plugins/index/search/util/ConfigUtil.java  |  2 +-
 9 files changed, 99 insertions(+), 83 deletions(-)

diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexProviderService.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexProviderService.java
index f2aebf1670..b00b680303 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexProviderService.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexProviderService.java
@@ -16,9 +16,6 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elastic;
 
-import org.apache.commons.io.FilenameUtils;
-import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
-import org.apache.jackrabbit.oak.cache.CacheStats;
 import org.apache.jackrabbit.oak.commons.IOUtils;
 import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
 import org.apache.jackrabbit.oak.plugins.index.AsyncIndexInfoService;
@@ -50,13 +47,11 @@ import 
org.osgi.service.metatype.annotations.ObjectClassDefinition;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
 import java.util.ArrayList;
 import java.util.Dictionary;
 import java.util.Hashtable;
 import java.util.List;
 
-import static org.apache.commons.io.FileUtils.ONE_MB;
 import static 
org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
 import static 
org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.scheduleWithFixedDelay;
 
@@ -130,8 +125,6 @@ public class ElasticIndexProviderService {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(ElasticIndexProviderService.class);
 
-    private static final String REPOSITORY_HOME = "repository.home";
-
     @Reference
     private StatisticsProvider statisticsProvider;
 
@@ -149,11 +142,10 @@ public class ElasticIndexProviderService {
 
     private ExtractedTextCache extractedTextCache;
 
-    private final List<ServiceRegistration> regs = new ArrayList<>();
+    private final List<ServiceRegistration<?>> regs = new ArrayList<>();
     private final List<Registration> oakRegs = new ArrayList<>();
 
     private Whiteboard whiteboard;
-    private File textExtractionDir;
 
     private ElasticConnection elasticConnection;
     private ElasticMetricHandler metricHandler;
@@ -200,7 +192,7 @@ public class ElasticIndexProviderService {
 
     @Deactivate
     private void deactivate() {
-        for (ServiceRegistration reg : regs) {
+        for (ServiceRegistration<?> reg : regs) {
             reg.unregister();
         }
 
@@ -242,63 +234,6 @@ public class ElasticIndexProviderService {
         Dictionary<String, Object> props = new Hashtable<>();
         props.put("type", ElasticIndexDefinition.TYPE_ELASTICSEARCH);
         
regs.add(bundleContext.registerService(IndexEditorProvider.class.getName(), 
editorProvider, props));
-//        oakRegs.add(registerMBean(whiteboard,
-//                TextExtractionStatsMBean.class,
-//                editorProvider.getExtractedTextCache().getStatsMBean(),
-//                TextExtractionStatsMBean.TYPE,
-//                "TextExtraction statistics"));
-    }
-
-    private void initializeExtractedTextCache(final Config config, 
StatisticsProvider statisticsProvider) {
-
-        extractedTextCache = new ExtractedTextCache(
-                config.extractedTextCacheSizeInMB() * ONE_MB,
-                config.extractedTextCacheExpiryInSecs(),
-                config.alwaysUsePreExtractedCache(),
-                textExtractionDir,
-                statisticsProvider);
-        if (extractedTextProvider != null) {
-            registerExtractedTextProvider(extractedTextProvider);
-        }
-        CacheStats stats = extractedTextCache.getCacheStats();
-        if (stats != null) {
-            oakRegs.add(registerMBean(whiteboard,
-                    CacheStatsMBean.class, stats,
-                    CacheStatsMBean.TYPE, stats.getName()));
-            LOG.info("Extracted text caching enabled with maxSize {} MB, 
expiry time {} secs",
-                    config.extractedTextCacheSizeInMB(), 
config.extractedTextCacheExpiryInSecs());
-        }
-    }
-
-    private void initializeTextExtractionDir(BundleContext bundleContext, 
Config config) {
-        String textExtractionDir = config.localTextExtractionDir();
-        if (textExtractionDir.trim().isEmpty()) {
-            String repoHome = bundleContext.getProperty(REPOSITORY_HOME);
-            if (repoHome != null) {
-                textExtractionDir = FilenameUtils.concat(repoHome, "index");
-            }
-        }
-
-        if (textExtractionDir == null) {
-            throw new IllegalStateException(String.format("Text extraction 
directory cannot be determined as neither " +
-                    "directory path [%s] nor repository home [%s] defined", 
PROP_LOCAL_TEXT_EXTRACTION_DIR, REPOSITORY_HOME));
-        }
-
-        this.textExtractionDir = new File(textExtractionDir);
-    }
-
-    private void registerExtractedTextProvider(PreExtractedTextProvider 
provider) {
-        if (extractedTextCache != null) {
-            if (provider != null) {
-                String usage = 
extractedTextCache.isAlwaysUsePreExtractedCache() ?
-                        "always" : "only during reindexing phase";
-                LOG.info("Registering PreExtractedTextProvider {} with 
extracted text cache. " +
-                        "It would be used {}", provider, usage);
-            } else {
-                LOG.info("Unregistering PreExtractedTextProvider with 
extracted text cache");
-            }
-            extractedTextCache.setExtractedTextProvider(provider);
-        }
     }
 
     private ElasticConnection getElasticConnection(Config contextConfig) {
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
index 05026f9e20..84d252a04f 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java
@@ -55,6 +55,7 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -97,7 +98,13 @@ public class ElasticCustomAnalyzer {
             NodeState defaultAnalyzer = 
state.getChildNode(FulltextIndexConstants.ANL_DEFAULT);
             if (defaultAnalyzer.exists()) {
                 IndexSettingsAnalysis.Builder builder = new 
IndexSettingsAnalysis.Builder();
-                Map<String, Object> analyzer = 
convertNodeState(defaultAnalyzer);
+                Map<String, Object> analyzer;
+                try {
+                    analyzer = convertNodeState(defaultAnalyzer);
+                } catch (IOException e) {
+                    LOG.warn("Can not load analyzer; using an empty 
configuration", e);
+                    analyzer = Map.of();
+                }
                 String builtIn = 
defaultAnalyzer.getString(FulltextIndexConstants.ANL_CLASS);
                 if (builtIn == null) {
                     builtIn = 
defaultAnalyzer.getString(FulltextIndexConstants.ANL_NAME);
@@ -107,11 +114,14 @@ public class ElasticCustomAnalyzer {
 
                     // content params, usually stop words
                     for (ChildNodeEntry nodeEntry : 
defaultAnalyzer.getChildNodeEntries()) {
+                        List<String> list;
                         try {
-                            analyzer.put(normalize(nodeEntry.getName()), 
loadContent(nodeEntry.getNodeState(), nodeEntry.getName(), 
NOOP_TRANSFORMATION));
+                            list = loadContent(nodeEntry.getNodeState(), 
nodeEntry.getName(), NOOP_TRANSFORMATION);
                         } catch (IOException e) {
-                            throw new IllegalStateException("Unable to load 
content for node entry " + nodeEntry.getName(), e);
+                            LOG.warn("Unable to load analyzer content for 
entry '" + nodeEntry.getName() + "'; using empty list", e);
+                            list = List.of();
                         }
+                        analyzer.put(normalize(nodeEntry.getName()), list);
                     }
 
                     builder.analyzer(analyzerName, new Analyzer(null, 
JsonData.of(analyzer)));
@@ -145,8 +155,22 @@ public class ElasticCustomAnalyzer {
 
     @NotNull
     private static TokenizerDefinition loadTokenizer(NodeState state) {
-        String name = 
normalize(Objects.requireNonNull(state.getString(FulltextIndexConstants.ANL_NAME)));
-        Map<String, Object> args = convertNodeState(state);
+        String name;
+        Map<String, Object> args;
+        if (!state.exists()) {
+            LOG.warn("No tokenizer specified; the standard with an empty 
configuration");
+            name = "Standard";
+            args = new HashMap<String, Object>();
+        } else {
+            name = 
Objects.requireNonNull(state.getString(FulltextIndexConstants.ANL_NAME));
+            try {
+                args = convertNodeState(state);
+            } catch (IOException e) {
+                LOG.warn("Can not load tokenizer; using an empty 
configuration", e);
+                args = new HashMap<String, Object>();
+            }
+        }
+        name = normalize(name);
         args.put(ANALYZER_TYPE, name);
         return new TokenizerDefinition(name, JsonData.of(args));
     }
@@ -228,7 +252,12 @@ public class ElasticCustomAnalyzer {
     }
 
     private static List<String> loadContent(NodeState file, String name, 
ContentTransformer transformer) throws IOException {
-        Blob blob = ConfigUtil.getBlob(file, name);
+        Blob blob;
+        try {
+            blob = ConfigUtil.getBlob(file, name);
+        } catch (IllegalArgumentException | IllegalStateException e) {
+            throw new IOException("Could not load " + name, e);
+        }
         try (Reader content = new 
InputStreamReader(Objects.requireNonNull(blob).getNewStream(), 
StandardCharsets.UTF_8)) {
             try (BufferedReader br = new BufferedReader(content)) {
                 return br.lines()
@@ -264,11 +293,25 @@ public class ElasticCustomAnalyzer {
         return name;
     }
 
-    private static Map<String, Object> convertNodeState(NodeState state) {
-        return convertNodeState(state, List.of(), List.of());
+    private static Map<String, Object> convertNodeState(NodeState state) 
throws IOException {
+        try {
+            return convertNodeState(state, List.of(), List.of());
+        } catch (IllegalStateException e) {
+            // convert runtime exception back to checked exception
+            throw new IOException("Can not convert", e);
+        }
     }
 
-    private static Map<String, Object> convertNodeState(NodeState state, 
List<ParameterTransformer> transformers, List<String> preloadedContent) {
+    /**
+     * Read analyzer configuration.
+     *
+     * @param state the node state
+     * @param transformers
+     * @param preloadedContent
+     * @return
+     * @throws IllegalStateException
+     */
+    private static Map<String, Object> convertNodeState(NodeState state, 
List<ParameterTransformer> transformers, List<String> preloadedContent) throws 
IllegalStateException {
         Map<String, Object> luceneParams = 
StreamSupport.stream(Spliterators.spliteratorUnknownSize(state.getProperties().iterator(),
 Spliterator.ORDERED), false)
                 .filter(ElasticCustomAnalyzer::isPropertySupported)
                 .collect(Collectors.toMap(PropertyState::getName, ps -> {
@@ -280,6 +323,8 @@ public class ElasticCustomAnalyzer {
                                 return 
loadContent(state.getChildNode(v.trim()), v.trim(),
                                         
CONTENT_TRANSFORMERS.getOrDefault(ps.getName(), NOOP_TRANSFORMATION)).stream();
                             } catch (IOException e) {
+                                // convert checked exception to runtime 
exception to runtime exception,
+                                // because the stream API doesn't support 
checked exceptions
                                 throw new IllegalStateException(e);
                             }
                         }).collect(Collectors.toList()));
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
index 2f1ee7e26e..a7918ec83f 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java
@@ -97,6 +97,7 @@ public class ElasticDocument {
                         map -> {
                             Object existingValue = 
map.get(ElasticIndexHelper.DYNAMIC_PROPERTY_VALUE);
                             if (existingValue instanceof Set) {
+                                @SuppressWarnings("unchecked")
                                 Set<Object> existingSet = (Set<Object>) 
existingValue;
                                 existingSet.add(value);
                             } else {
@@ -134,6 +135,7 @@ public class ElasticDocument {
         if (existingValue == null) {
             finalValue = value;
         } else if (existingValue instanceof Set) {
+            @SuppressWarnings("unchecked")
             Set<Object> existingSet = (Set<Object>) existingValue;
             existingSet.add(value);
             finalValue = existingSet;
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexEditorContext.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexEditorContext.java
index 330ebe416c..4e7f68cf95 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexEditorContext.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexEditorContext.java
@@ -40,7 +40,7 @@ class ElasticIndexEditorContext extends 
FulltextIndexEditorContext<ElasticDocume
     }
 
     @Override
-    public IndexDefinition.Builder newDefinitionBuilder() {
+    public ElasticIndexDefinition.Builder newDefinitionBuilder() {
         return new ElasticIndexDefinition.Builder(((ElasticIndexDefinition) 
definition).getIndexPrefix());
     }
 
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticInferenceTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticInferenceTest.java
index 22a334f344..d0c459f0c5 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticInferenceTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticInferenceTest.java
@@ -145,12 +145,13 @@ public class ElasticInferenceTest extends 
ElasticAbstractQueryTest {
         for (String path : paths) {
             URL json = this.getClass().getResource("/inference" + path + 
".json");
             if (json != null) {
-                Map<String, Object> map = mapper.readValue(json, Map.class);
+                @SuppressWarnings("unchecked")
+                Map<String, Collection<Double>> map = mapper.readValue(json, 
Map.class);
                 ObjectNode updateDoc = mapper.createObjectNode();
                 ObjectNode inferenceNode = 
updateDoc.putObject(ElasticIndexDefinition.INFERENCE);
                 ArrayNode embeddingsNode = 
inferenceNode.putObject("embeddings").putArray("value");
                 inferenceNode.putObject("metadata").put("updatedAt", 
Instant.now().toEpochMilli());
-                for (Double d : (Collection<Double>) map.get("embedding")) {
+                for (Double d : map.get("embedding")) {
                     embeddingsNode.add(d);
                 }
                 updateDocument(index, path, updateDoc);
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPerfTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPerfTest.java
index c81412f755..77ef7ce225 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPerfTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPerfTest.java
@@ -167,12 +167,11 @@ public class ElasticPerfTest extends 
ElasticAbstractQueryTest {
 
     private void testQuery(String query, String language) throws Exception {
         Result result = executeQuery(query, language, NO_BINDINGS);
-        Iterable<ResultRow> it = (Iterable<ResultRow>) result.getRows();
-        Iterator<ResultRow> iterator = it.iterator();
+        Iterator<? extends ResultRow> iterator = result.getRows().iterator();
         long start = LOG_PERF.startForInfoLog("Getting result rows");
         int i = 0;
         while (iterator.hasNext()) {
-            ResultRow row = iterator.next();
+            iterator.next();
             i++;
         }
         LOG_PERF.end(start, -1,-1, "{} Results fetched", i);
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticTestServer.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticTestServer.java
index 4efcc665e5..cf694cb673 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticTestServer.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticTestServer.java
@@ -62,6 +62,7 @@ public class ElasticTestServer implements AutoCloseable {
         return CONTAINER;
     }
 
+    @SuppressWarnings("resource")
     private synchronized void setup() {
         String esDockerImageVersion = ELASTIC_DOCKER_IMAGE_VERSION != null ? 
ELASTIC_DOCKER_IMAGE_VERSION : Version.VERSION.toString();
         LOG.info("Elasticsearch test Docker image version: {}.", 
esDockerImageVersion);
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
index 9b7372967e..a32c90dca0 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
@@ -95,6 +95,39 @@ public class ElasticIndexHelperTest {
         ElasticIndexHelper.createIndexRequest("prefix.path", definition);
     }
 
+    @Test
+    public void analyzerWithEmptyTokenizer() {
+        IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+        IndexDefinitionBuilder.IndexRule indexRule = 
builder.indexRule("idxRule");
+        indexRule.property("foo").type("String").useInSimilarity();
+
+        Tree analyzer = builder.getBuilderTree().addChild("analyzers");
+        Tree defaultAnalyzer = analyzer.addChild("default");
+        defaultAnalyzer.setProperty(FulltextIndexConstants.ANL_CLASS, 
"org.apache.lucene.analysis.en.EnglishAnalyzer");
+        defaultAnalyzer.addChild("tokenizer");
+        defaultAnalyzer.addChild("filter");
+
+        NodeState nodeState = builder.build();
+        ElasticIndexDefinition definition =
+                new ElasticIndexDefinition(nodeState, nodeState, "path", 
"prefix");
+        ElasticIndexHelper.createIndexRequest("prefix.path", definition);
+    }
+
+    @Test
+    public void analyzerWithEmptyDefault() {
+        IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+        IndexDefinitionBuilder.IndexRule indexRule = 
builder.indexRule("idxRule");
+        indexRule.property("foo").type("String").useInSimilarity();
+
+        Tree analyzer = builder.getBuilderTree().addChild("analyzers");
+        analyzer.addChild("default");
+
+        NodeState nodeState = builder.build();
+        ElasticIndexDefinition definition =
+                new ElasticIndexDefinition(nodeState, nodeState, "path", 
"prefix");
+        ElasticIndexHelper.createIndexRequest("prefix.path", definition);
+    }
+
     @Test()
     public void indexSettingsAreCorrectlySet() {
         IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
diff --git 
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/ConfigUtil.java
 
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/ConfigUtil.java
index 9b5c3d0491..517e055d7b 100644
--- 
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/ConfigUtil.java
+++ 
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/ConfigUtil.java
@@ -110,7 +110,7 @@ public class ConfigUtil {
      * the jcr:content/@jcr:data property to get the binary content
      */
     @Nullable
-    public static Blob getBlob(NodeState state, String resourceName){
+    public static Blob getBlob(NodeState state, String resourceName) {
         NodeState contentNode = state.getChildNode(JcrConstants.JCR_CONTENT);
         checkArgument(contentNode.exists(), "Was expecting to find jcr:content 
node to read resource %s", resourceName);
         PropertyState property = 
contentNode.getProperty(JcrConstants.JCR_DATA);

Reply via email to