This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch OAK-12097
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/OAK-12097 by this push:
new 4720715751 feat: loadTokenizer uses default tokenizer when none
provided (#2767)
4720715751 is described below
commit 4720715751598f6bbcfb1b537a41dc3959b7631c
Author: Anton Hosgood <[email protected]>
AuthorDate: Fri Feb 27 14:09:22 2026 +0100
feat: loadTokenizer uses default tokenizer when none provided (#2767)
Co-authored-by: Anton Hosgood <[email protected]>
---
.../oak/plugins/index/lucene/NodeStateAnalyzerFactory.java | 10 ++++++----
.../plugins/index/lucene/NodeStateAnalyzerFactoryTest.java | 13 +++++++++++++
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactory.java
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactory.java
index ee08fc1718..14c067d06d 100644
---
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactory.java
+++
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactory.java
@@ -58,8 +58,6 @@ import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static java.util.Objects.requireNonNull;
-
/**
* Constructs a Lucene Analyzer from nodes (based on NodeState content).
*
@@ -141,9 +139,13 @@ final class NodeStateAnalyzerFactory {
}
private TokenizerFactory loadTokenizer(NodeState state) {
- String clazz =
requireNonNull(state.getString(FulltextIndexConstants.ANL_NAME));
+ String name = state.getString(FulltextIndexConstants.ANL_NAME);
+ if (name == null) {
+ log.warn("No tokenizer name specified in analyzer configuration.
Using default 'standard' tokenizer.");
+ name = "standard";
+ }
Map<String, String> args = convertNodeState(state);
- TokenizerFactory tf = TokenizerFactory.forName(clazz, args);
+ TokenizerFactory tf = TokenizerFactory.forName(name, args);
init(tf, state);
return tf;
}
diff --git
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactoryTest.java
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactoryTest.java
index e9a4f1365c..78909e98e6 100644
---
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactoryTest.java
+++
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/NodeStateAnalyzerFactoryTest.java
@@ -38,6 +38,7 @@ import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
import org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory;
+import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
@@ -191,6 +192,18 @@ public class NodeStateAnalyzerFactoryTest {
assertNull(result.get(":hiddenProp"));
}
+ @Test
+ public void missingTokenizerName() throws Exception{
+ NodeBuilder nb = EMPTY_NODE.builder();
+ nb.child(ANL_TOKENIZER);
+
+ TokenizerChain analyzer = (TokenizerChain)
factory.createInstance(nb.getNodeState());
+ assertNotNull(analyzer);
+ assertNotNull(analyzer.getTokenizer());
+
+ assertEquals(StandardTokenizerFactory.class.getName(),
analyzer.getTokenizer().getClassArg());
+ }
+
private static NodeBuilder createFileNode(NodeBuilder nb, String nodeName,
byte[] content){
return nb.child(nodeName).child(JCR_CONTENT).setProperty(JCR_DATA,
content);
}