Author: thomasm Date: Wed Apr 22 14:52:46 2020 New Revision: 1876830 URL: http://svn.apache.org/viewvc?rev=1876830&view=rev Log: OAK-8971 Indexing: dynamic boost, as an alternative to IndexFieldProvider
Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/FulltextQueryTermsProviderImpl.java jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/IndexFieldProviderImpl.java Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/FulltextQueryTermsProvider.java jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/IndexFieldProvider.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1876830&r1=1876829&r2=1876830&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java Wed Apr 22 14:52:46 2020 @@ -20,6 +20,8 @@ package org.apache.jackrabbit.oak.plugins.index.lucene; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.jackrabbit.oak.api.Blob; @@ -40,7 +42,6 @@ import org.apache.lucene.facet.FacetsCon import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; -import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,6 +55,10 @@ import static org.apache.jackrabbit.oak. public class LuceneDocumentMaker extends FulltextDocumentMaker<Document> { private static final Logger log = LoggerFactory.getLogger(LuceneDocumentMaker.class); + private static final String DYNAMIC_BOOST_TAG_NAME = "name"; + private static final String DYNAMIC_BOOST_TAG_CONFIDENCE = "confidence"; + private static final String DYNAMIC_BOOST_SPLIT_REGEX = "[:/]"; + private final FacetsConfigProvider facetsConfigProvider; private final IndexAugmentorFactory augmentorFactory; @@ -331,4 +336,69 @@ public class LuceneDocumentMaker extends } } } + + @Override + protected boolean indexDynamicBoost(Document doc, PropertyDefinition pd, NodeState nodeState, String propertyName) { + NodeState propertNode = nodeState; + String parentName = PathUtils.getParentPath(propertyName); + for (String c : PathUtils.elements(parentName)) { + propertNode = propertNode.getChildNode(c); + } + boolean added = false; + for (String nodeName : propertNode.getChildNodeNames()) { + NodeState dynaTag = propertNode.getChildNode(nodeName); + String dynaTagName = dynaTag.getProperty(DYNAMIC_BOOST_TAG_NAME).getValue(Type.STRING); + Double dynaTagConfidence = dynaTag.getProperty(DYNAMIC_BOOST_TAG_CONFIDENCE).getValue(Type.DOUBLE); + + List<String> tokens = new ArrayList<>(splitForIndexing(dynaTagName)); + if (tokens.size() > 1) { + // Actual name not in tokens + tokens.add(dynaTagName); + } + boolean addedForThisChild = false; + for (String token : tokens) { + if (token.length() > 0) { + AugmentedField f = new AugmentedField(parentName + "/" + token.toLowerCase(), dynaTagConfidence); + if (doc.getField(f.name()) == null) { + addedForThisChild = true; + added = true; + doc.add(f); + } + } + } + if (addedForThisChild) { + log.trace( + "Added augmented fields: {}[{}], {}", + parentName + "/", String.join(", ", tokens), dynaTagConfidence + ); + } + } + return added; + } + + private static class AugmentedField extends Field { + private static final FieldType ft = new FieldType(); + static { + ft.setIndexed(true); + ft.setStored(false); + ft.setTokenized(false); + ft.setOmitNorms(false); + ft.setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_ONLY); + ft.freeze(); + } + + AugmentedField(String name, double weight) { + super(name, "1", ft); + setBoost((float) weight); + } + } + + private static List<String> splitForIndexing(String tagName) { + return Arrays.asList(removeBackSlashes(tagName).split(DYNAMIC_BOOST_SPLIT_REGEX)); + } + + private static String removeBackSlashes(String text) { + return text.replaceAll("\\\\", ""); + } + } \ No newline at end of file Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/FulltextQueryTermsProvider.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/FulltextQueryTermsProvider.java?rev=1876830&r1=1876829&r2=1876830&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/FulltextQueryTermsProvider.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/FulltextQueryTermsProvider.java Wed Apr 22 14:52:46 2020 @@ -24,9 +24,16 @@ import org.jetbrains.annotations.Nullabl import java.util.Collections; import java.util.Set; + /** - * Implementations of this interface would get callbacks while forming lucene full text queries. + * Implementations of this interface would get callbacks while forming lucene + * full text queries. + * + * @deprecated This interface exposes Lucene API directly - it will be removed + * as soon as an alternative is available. See also + * IndexFieldProvider. */ +@Deprecated public interface FulltextQueryTermsProvider { /** * Implementation which doesn't do anything useful... yet, abides with the contract. Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/IndexFieldProvider.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/IndexFieldProvider.java?rev=1876830&r1=1876829&r2=1876830&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/IndexFieldProvider.java (original) +++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/spi/IndexFieldProvider.java Wed Apr 22 14:52:46 2020 @@ -25,7 +25,11 @@ import java.util.Set; /** * Implementations of this interface would get callbacks while indexing documents. It's the responsibility * of the implementation to exit as early as possible if it doesn't care about the document being indexed. + * + * @deprecated This interface exposes Lucene API directly - it will be removed soon. + * Use the "dynamicBoost" feature instead. */ +@Deprecated public interface IndexFieldProvider { /** * Implementation which doesn't do anything useful... yet, abides with the contract. Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java?rev=1876830&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java (added) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java Wed Apr 22 14:52:46 2020 @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.plugins.index.lucene.dynamicBoost; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; + +import org.apache.jackrabbit.JcrConstants; +import org.apache.jackrabbit.oak.Oak; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.api.ContentRepository; +import org.apache.jackrabbit.oak.api.Tree; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.junit.LogCustomizer; +import org.apache.jackrabbit.oak.plugins.index.lucene.IndexAugmentorFactory; +import org.apache.jackrabbit.oak.plugins.index.lucene.IndexTracker; +import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneDocumentMaker; +import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants; +import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider; +import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider; +import org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil; +import org.apache.jackrabbit.oak.plugins.index.lucene.spi.IndexFieldProvider; +import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache; +import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants; +import org.apache.jackrabbit.oak.plugins.index.search.IndexFormatVersion; +import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry; +import org.apache.jackrabbit.oak.query.AbstractQueryTest; +import org.apache.jackrabbit.oak.spi.commit.Observer; +import org.apache.jackrabbit.oak.spi.mount.Mounts; +import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider; +import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider; +import org.junit.Test; + +import ch.qos.logback.classic.Level; + +/** + * Tests the index augmentation feature. + */ +public class DynamicBoostTest extends AbstractQueryTest { + + public static final String ASSET_NODE_TYPE = + "[dam:Asset]\n" + + " - * (UNDEFINED) multiple\n" + + " - * (UNDEFINED)\n" + + " + * (nt:base) = oak:TestNode VERSION"; + + private static final String UNSTRUCTURED = "nt:unstructured"; + + private final SimpleIndexAugmentorFactory factory = new SimpleIndexAugmentorFactory(); + + @Override + protected void createTestIndexNode() throws Exception { + setTraversalEnabled(false); + } + + @Override + protected ContentRepository createRepository() { + IndexTracker tracker = new IndexTracker(); + LuceneIndexEditorProvider editorProvider = new LuceneIndexEditorProvider(null, + new ExtractedTextCache(0, 0), + factory, Mounts.defaultMountInfoProvider()); + LuceneIndexProvider provider = new LuceneIndexProvider(tracker, + factory); + return new Oak() + .with(new OpenSecurityProvider()) + .with((QueryIndexProvider) provider) + .with((Observer) provider) + .with(editorProvider) + .createContentRepository(); + } + + @Test public void withFieldProvider() throws Exception { + NodeTypeRegistry.register(root, toInputStream(ASSET_NODE_TYPE), "test nodeType"); + createIndex("dam:Asset"); + root.commit(); + factory.indexFieldProvider = new IndexFieldProviderImpl(); + + String log = runTest(IndexFieldProviderImpl.class); + assertEquals( + "[" + + "Added augmented fields: jcr:content/metadata/predictedTags/[my, a, my:a], 10.0" + + "]", log); + } + + @Test public void withDynamicBoost() throws Exception { + NodeTypeRegistry.register(root, toInputStream(ASSET_NODE_TYPE), "test nodeType"); + Tree props = createIndex("dam:Asset"); + Tree pt = createNodeWithType(props, "predictedTags", UNSTRUCTURED); + pt.setProperty("name", "jcr:content/metadata/predictedTags/.*"); + pt.setProperty("isRegexp", true); + pt.setProperty("dynamicBoost", true); + pt.setProperty("propertyIndex", true); + root.commit(); + + String log = runTest(LuceneDocumentMaker.class); + assertEquals( + "[" + + "Added augmented fields: jcr:content/metadata/predictedTags/[my, a, my:a], 10.0, " + + "Added augmented fields: jcr:content/metadata/predictedTags/[my, a, my:a], 30.0" + + "]", log); + } + + private String runTest(Class<?> loggerClass) throws CommitFailedException { + LogCustomizer customLogs = LogCustomizer + .forLogger(loggerClass) + .enable(Level.TRACE).create(); + customLogs.starting(); + try { + Tree test = createNodeWithType(root.getTree("/"), "test", UNSTRUCTURED); + Tree node = createNodeWithType(test, "item", "dam:Asset"); + Tree predicted = + createNodeWithType( + createNodeWithType( + createNodeWithType(node, JcrConstants.JCR_CONTENT, UNSTRUCTURED), + "metadata", UNSTRUCTURED), + "predictedTags", UNSTRUCTURED); + Tree t = createNodeWithType(predicted, "a", UNSTRUCTURED); + t.setProperty("name", "my:a"); + t.setProperty("confidence", 10.0); + root.commit(); + // this is not detected + t.setProperty("confidence", 20); + root.commit(); + // now we change an indexed property: + // this is detected in the dynamicBoost case + t.getParent().setProperty("updateCount", 2); + t.setProperty("confidence", 30); + root.commit(); + return customLogs.getLogs().toString(); + } finally { + customLogs.finished(); + } + } + + private static Tree createNodeWithType(Tree t, String nodeName, String typeName){ + t = t.addChild(nodeName); + t.setProperty(JcrConstants.JCR_PRIMARYTYPE, typeName, Type.NAME); + return t; + } + + private Tree createIndex(String nodeType) throws Exception { + Tree rootTree = root.getTree("/"); + Tree index = createTestIndexNode(rootTree, LuceneIndexConstants.TYPE_LUCENE); + index.setProperty(FulltextIndexConstants.COMPAT_MODE, IndexFormatVersion.V2.getVersion()); + return TestUtil.newRulePropTree(index, nodeType); + } + + private static class SimpleIndexAugmentorFactory extends IndexAugmentorFactory { + IndexFieldProvider indexFieldProvider = IndexFieldProvider.DEFAULT; + + @Override + public IndexFieldProvider getIndexFieldProvider(String nodeType) { + return indexFieldProvider; + } + + } + + private static InputStream toInputStream(String x) { + return new ByteArrayInputStream(x.getBytes()); + } + +} Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/FulltextQueryTermsProviderImpl.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/FulltextQueryTermsProviderImpl.java?rev=1876830&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/FulltextQueryTermsProviderImpl.java (added) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/FulltextQueryTermsProviderImpl.java Wed Apr 22 14:52:46 2020 @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene.dynamicBoost; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.TermQuery; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An example fulltext query terms provider. + */ +public class FulltextQueryTermsProviderImpl implements FulltextQueryTermsProvider { + + private static final Logger LOG = LoggerFactory.getLogger(FulltextQueryTermsProviderImpl.class); + private static final String SEARCH_SPLIT_REGEX = "[ ]"; + private static final String NT_DAM_ASSET = "dam:Asset"; + private static final String PREDICTED_TAGS_REL_PATH = ""; + private static final int MAX_FRAGMENT_SIZE = 2; + private static final int MAX_QUERY_SIZE = 10; + + @Override + public Set<String> getSupportedTypes() { + Set<String> supportedTypes = new HashSet<String>(); + supportedTypes.add(NT_DAM_ASSET); + return supportedTypes; + } + + @Override + public org.apache.lucene.search.Query getQueryTerm(String text, Analyzer analyzer, NodeState indexDefinition) { + if (analyzer == null || text == null) { + return null; + } + + LOG.debug("getQueryTerm Text: {}", text); + BooleanQuery query = this.createQuery(); + + Set<String> charTerms = new HashSet<String>(splitForSearch(text)); + LOG.debug("getQueryTerm charTerms: {}", charTerms); + if(charTerms.size() > MAX_QUERY_SIZE) { + LOG.debug("Not adding query terms for smart tags as number of terms in the query {} exceeds " + + "maximum permissible value of {}", charTerms.size(), MAX_QUERY_SIZE); + return null; + } + List<String> fragments = prepareFragments(charTerms); + + for(String fragment : fragments) { + Term term = new Term(PREDICTED_TAGS_REL_PATH + fragment.toLowerCase(), "1"); + query.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + LOG.debug("Added query term: {}", fragment.toLowerCase()); + } + + + Term term = new Term(PREDICTED_TAGS_REL_PATH + text.toLowerCase(), "1"); + query.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + LOG.debug("Added query term: {}", text.toLowerCase()); + + //De-boosting smart tags based query. + query.setBoost(0.0001f); + return query; + + } + + private List<String> prepareFragments(Set<String> charTerms) { + + List<String> fragments = new ArrayList<String>(); + Set<Set<String>> powerSet = powerSet(charTerms); + + for(Set<String> set : powerSet) { + StringBuilder sb = null; + for(String s : set) { + if(sb == null) { + sb = new StringBuilder(); + } + sb.append(s); + if(sb.length() > 0) { + sb.append(' '); + } + } + if(sb != null) { + fragments.add(sb.toString().trim()); + } + } + + return fragments; + } + + private <T> Set<Set<T>> powerSet(Set<T> originalSet) { + Set<Set<T>> powerSet = new HashSet<Set<T>>(); + if (originalSet.isEmpty()) { + powerSet.add(new HashSet<T>()); + return powerSet; + } + List<T> list = new ArrayList<T>(originalSet); + T head = list.get(0); + Set<T> rest = new HashSet<T>(list.subList(1, list.size())); + for (Set<T> subsetExcludingHead : powerSet(rest)) { + Set<T> subsetIncludingHead = new HashSet<T>(); + subsetIncludingHead.add(head); + subsetIncludingHead.addAll(subsetExcludingHead); + if(subsetIncludingHead.size() <= MAX_FRAGMENT_SIZE) { + powerSet.add(subsetIncludingHead); + } + if(subsetExcludingHead.size() <= MAX_FRAGMENT_SIZE) { + powerSet.add(subsetExcludingHead); + } + } + return powerSet; + } + + private List<String> splitForSearch(String tagName) { + return Arrays.asList(removeBackSlashes(tagName).split(SEARCH_SPLIT_REGEX)); + } + + private String removeBackSlashes(String text) { + return text.replaceAll("\\\\", ""); + } + + protected BooleanQuery createQuery() { + return new BooleanQuery(); + } + +} Added: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/IndexFieldProviderImpl.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/IndexFieldProviderImpl.java?rev=1876830&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/IndexFieldProviderImpl.java (added) +++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/IndexFieldProviderImpl.java Wed Apr 22 14:52:46 2020 @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.lucene.dynamicBoost; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.jackrabbit.JcrConstants; +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.plugins.index.lucene.spi.IndexFieldProvider; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static com.google.common.collect.Sets.newHashSet; + +/** + * An example index field provider. + */ +public class IndexFieldProviderImpl implements IndexFieldProvider { + + private static final Logger LOG = LoggerFactory.getLogger(IndexFieldProviderImpl.class); + + private static final String METADATA_FOLDER = "metadata"; + private static final String PREDICTED_TAGS = "predictedTags"; + private static final String PREDICTED_TAG_NAME = "name"; + private static final String PREDICTED_TAG_CONFIDENCE = "confidence"; + private static final String PREDICTED_TAGS_REL_PATH = JcrConstants.JCR_CONTENT + "/" + METADATA_FOLDER + "/" + + PREDICTED_TAGS + "/"; + private static final String INDEXING_SPLIT_REGEX = "[:/]"; + private static final String NT_DAM_ASSET = "dam:Asset"; + + @Override + public Set<String> getSupportedTypes() { + Set<String> supportedTypes = new HashSet<String>(); + supportedTypes.add(NT_DAM_ASSET); + return supportedTypes; + } + + @Override + public @NotNull Iterable<Field> getAugmentedFields(String path, NodeState nodeState, NodeState indexDefinition) { + Set<Field> fields = newHashSet(); + NodeState dynaTags = nodeState.getChildNode(JcrConstants.JCR_CONTENT).getChildNode(METADATA_FOLDER).getChildNode(PREDICTED_TAGS); + for (String nodeName : dynaTags.getChildNodeNames()) { + NodeState dynaTag = dynaTags.getChildNode(nodeName); + String dynaTagName = dynaTag.getProperty(PREDICTED_TAG_NAME).getValue(Type.STRING); + Double dynaTagConfidence = dynaTag.getProperty(PREDICTED_TAG_CONFIDENCE).getValue(Type.DOUBLE); + + List<String> tokens = new ArrayList<>(splitForIndexing(dynaTagName)); + if (tokens.size() > 1) { // Actual name not in tokens + tokens.add(dynaTagName); + } + for (String token : tokens) { + if (token.length() > 0) { + fields.add(new AugmentedField(PREDICTED_TAGS_REL_PATH + token.toLowerCase(), dynaTagConfidence)); + } + } + LOG.trace( + "Added augmented fields: {}[{}], {}", + PREDICTED_TAGS_REL_PATH, String.join(", ", tokens), dynaTagConfidence + ); + } + return fields; + } + + private static class AugmentedField extends Field { + private static final FieldType ft = new FieldType(); + static { + ft.setIndexed(true); + ft.setStored(false); + ft.setTokenized(false); + ft.setOmitNorms(false); + ft.setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_ONLY); + ft.freeze(); + } + + AugmentedField(String name, double weight) { + super(name, "1", ft); + setBoost((float) weight); + } + } + + private static List<String> splitForIndexing(String tagName) { + return Arrays.asList(removeBackSlashes(tagName).split(INDEXING_SPLIT_REGEX)); + } + + private static String removeBackSlashes(String text) { + return text.replaceAll("\\\\", ""); + } + +} \ No newline at end of file Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java?rev=1876830&r1=1876829&r2=1876830&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java Wed Apr 22 14:52:46 2020 @@ -186,4 +186,11 @@ class ElasticsearchDocumentMaker extends // TODO : not implemented return false; } + + @Override + protected boolean indexDynamicBoost(ElasticsearchDocument doc, PropertyDefinition pd, NodeState nodeState, + String propertyName) { + // TODO : not implemented + return false; + } } Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1876830&r1=1876829&r2=1876830&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java (original) +++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java Wed Apr 22 14:52:46 2020 @@ -138,6 +138,8 @@ public interface FulltextIndexConstants String PROP_WEIGHT = "weight"; + String PROP_DYNAMIC_BOOST = "dynamicBoost"; + /** * Boolean property in property definition to mark sync properties */ Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java?rev=1876830&r1=1876829&r2=1876830&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java (original) +++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java Wed Apr 22 14:52:46 2020 @@ -102,6 +102,8 @@ public class PropertyDefinition { public final int weight; + public final boolean dynamicBoost; + /** * Property name excluding the relativePath. For regular expression based definition * its set to null @@ -136,6 +138,7 @@ public class PropertyDefinition { this.relative = isRelativeProperty(name); this.boost = getOptionalValue(defn, FIELD_BOOST, DEFAULT_BOOST); this.weight = getOptionalValue(defn, PROP_WEIGHT, DEFAULT_PROPERTY_WEIGHT); + this.dynamicBoost = getOptionalValue(defn, FulltextIndexConstants.PROP_DYNAMIC_BOOST, false); //By default if a property is defined it is indexed this.index = getOptionalValue(defn, FulltextIndexConstants.PROP_INDEX, true); Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1876830&r1=1876829&r2=1876830&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original) +++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Wed Apr 22 14:52:46 2020 @@ -101,6 +101,8 @@ public abstract class FulltextDocumentMa protected abstract void indexTypedProperty(D doc, PropertyState property, String pname, PropertyDefinition pd, int index); + protected abstract boolean indexDynamicBoost(D doc, PropertyDefinition pd, NodeState nodeState, String propertyName); + protected abstract void indexAncestors(D doc, String path); protected abstract void indexNotNullProperty(D doc, PropertyDefinition pd); @@ -244,6 +246,9 @@ public abstract class FulltextDocumentMa if (pd.propertyIndex && pd.includePropertyType(property.getType().tag())) { dirty |= addTypedFields(doc, property, pname, pd); } + if (pd.dynamicBoost) { + dirty |= indexDynamicBoost(doc, pd, state, pname); + } if (pd.fulltextEnabled() && includeTypeForFullText) { for (String value : property.getValue(Type.STRINGS)) {