This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 41e615aeae OAK-11530 Elasticsearch: 'field name cannot be an empty
string' (#2121)
41e615aeae is described below
commit 41e615aeae38e74a817406163e2904bcb5ccf0ef
Author: Thomas Mueller <[email protected]>
AuthorDate: Fri Feb 28 10:44:24 2025 +0100
OAK-11530 Elasticsearch: 'field name cannot be an empty string' (#2121)
* OAK-11530 Elasticsearch: 'field name cannot be an empty string' if fields
start with a dot
* OAK-11530 Elasticsearch: 'field name cannot be an empty string' if fields
start with a dot
* OAK-11530 Elasticsearch: 'field name cannot be an empty string' if fields
start with a dot
* OAK-11530 Elasticsearch: 'field name cannot be an empty string' if fields
start with a dot
* OAK-11530 Elasticsearch: 'field name cannot be an empty string' if fields
start with a dot
* OAK-11530 Elasticsearch: 'field name cannot be an empty string' if fields
start with a dot
---
oak-doc/src/site/markdown/query/lucene.md | 46 ++++++++++++----------
.../invalidData/InvalidIndexDefinitionTest.java | 29 +++++++++++++-
.../index/elastic/ElasticPropertyDefinition.java | 16 ++++++++
.../index/elastic/index/ElasticIndexWriter.java | 24 +++++++++++
.../index/elastic/ElasticPropertyIndexTest.java | 25 +++++++++++-
.../elastic/index/ElasticIndexWriterTest.java | 14 +++++++
.../plugins/index/search/PropertyDefinition.java | 5 +--
7 files changed, 133 insertions(+), 26 deletions(-)
diff --git a/oak-doc/src/site/markdown/query/lucene.md
b/oak-doc/src/site/markdown/query/lucene.md
index 1952b1f3f0..5cec090b16 100644
--- a/oak-doc/src/site/markdown/query/lucene.md
+++ b/oak-doc/src/site/markdown/query/lucene.md
@@ -390,14 +390,7 @@ defined at the property definition level
name
: Property name. If not defined, then the property name is set to the node
name.
- Can also be set to a relative property, e.g., `jcr:content/metadata/color`.
- For relative properties, one wildcard (`*`) is supported instead of a node
name:
- `*/color` aggregates the values of the property `color` of all direct child
nodes.
-
- If `isRegexp` is true, then the property name is a regular expression.
-
- Special properties such as "jcr:path", "jcr:score" can not be indexed.
- The path can be indexes using a function-based index in recent versions of
Oak.
+ See [Property Names](#property-names).
isRegexp
: If set to true, then the property name is interpreted as a regular
@@ -560,25 +553,38 @@ unique
: Requires "sync=true". Enforces unique property values in the content.
: See [Hybrid Indexes][hybrid-index] for details.
-<a name="property-names"></a>**Property Names**
+##### <a name="property-names"></a>**Property Names**
-Property name can be one of following
+Property `name` can be one of the following:
-1. Simple name - Like `assetType` etc. These are used for properties which are
- defined directly on the indexed node
-2. Relative name - Like `jcr:content/metadata/title`. These are used for
+1. Simple name - like `assetType` etc. These are used for properties which are
+ defined directly on the indexed node.
+2. Relative name - like `jcr:content/metadata/title`. These are used for
properties which are defined relative to the node being indexed.
-3. Regular Expression - Like `.*`. Used when only property whose name
- match given pattern are to be indexed.
- They can also be used for relative properties like
- `jcr:content/metadata/dc:.*$`
+ For relative properties, one wildcard (`*`) is supported instead of a node
name:
+ `*/color` aggregates the values of the property `color` of all direct child
nodes.
+3. Regular Expression -
+ if `isRegexp` is true, then the property name is a regular expression, for
example `.*`.
+ In this case, the properties whose name match the given pattern are indexed.
+ The value can refer to relative properties like
`jcr:content/metadata/dc:.*$`,
which indexes all property names starting with `dc` from node with
- relative path `jcr:content/metadata`
+ relative path `jcr:content/metadata`.
4. The string `:nodeName` - this special case indexes node name as if it's a
virtual property of the node being indexed. Setting this along with
- `nodeScopeIndex=true` is akin to setting `indexNodeName=true` on indexing
+ `nodeScopeIndex = true` is akin to setting `indexNodeName = true` on
indexing
rule (`@since Oak 1.3.15, 1.2.14`).
- Ordering is not supported. For ordering, use `function=name()` instead.
+ Ordering is not supported.
+ For ordering, use `function = "name()"` instead.
+
+Limitations:
+
+* Special properties such as `jcr:path`, `jcr:score` can not be indexed.
+ To index the path, use [function based indexing](#function-based-indexing):
`function = "path()"`.
+* Properties where the `name` value starts with a dot
+ (eg. `./jcr:content/metadata/title`) are silently _ignored_,
+ for backward compatibility.
+ That means the property is not indexed, and when querying,
+ the index will ignore conditions on this field.
##### <a name="path-restrictions"></a> Evaluate Path Restrictions
diff --git
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/invalidData/InvalidIndexDefinitionTest.java
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/invalidData/InvalidIndexDefinitionTest.java
index bfac846b34..2fc3c9bf44 100644
---
a/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/invalidData/InvalidIndexDefinitionTest.java
+++
b/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/invalidData/InvalidIndexDefinitionTest.java
@@ -213,8 +213,33 @@ public class InvalidIndexDefinitionTest extends
AbstractQueryTest {
String query = "select [jcr:path] from [nt:base] where
isdescendantnode('/tmp') and upper([./test]) = 'HELLO'";
assertThat(explain(query), containsString("traverse"));
assertQuery(query, List.of("/tmp/testNode"));
- }
-
+ }
+
+ @Test
+ public void invalidProperty() throws CommitFailedException {
+ Tree def = createIndexNodeAndData();
+ Tree indexRules = def.getChild(LuceneIndexConstants.INDEX_RULES);
+ Tree ntBase = indexRules.getChild("nt:base");
+ Tree properties = ntBase.getChild(FulltextIndexConstants.PROP_NODE);
+ Tree foo = properties.addChild("foo");
+ foo.setProperty(FulltextIndexConstants.PROP_NAME, "foo");
+ foo.setProperty(FulltextIndexConstants.PROP_PROPERTY_INDEX, true);
+ Tree bar = properties.addChild("bar");
+ // errors here are ignored - just the index is not used then
+ // ("./bar" is not a supported syntax)
+ bar.setProperty(FulltextIndexConstants.PROP_NAME, "./bar");
+ bar.setProperty(FulltextIndexConstants.PROP_PROPERTY_INDEX, true);
+ root.commit();
+ String query = "select [jcr:path] from [nt:base] where
isdescendantnode('/tmp') and [foo] = 'hello'";
+ assertThat(explain(query), containsString("lucene:test"));
+ query = "select [jcr:path] from [nt:base] where
isdescendantnode('/tmp') and [./foo] = 'hello'";
+ assertThat(explain(query), containsString("lucene:test"));
+ query = "select [jcr:path] from [nt:base] where
isdescendantnode('/tmp') and [bar] = 'hello'";
+ assertThat(explain(query), containsString("traverse"));
+ query = "select [jcr:path] from [nt:base] where
isdescendantnode('/tmp') and [./bar] = 'hello'";
+ assertThat(explain(query), containsString("traverse"));
+ }
+
Tree createIndexNodeAndData() throws CommitFailedException {
Tree tmp = root.getTree("/").addChild("tmp");
tmp.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
index b6298903bc..0a4f275cdd 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
@@ -18,6 +18,9 @@ package org.apache.jackrabbit.oak.plugins.index.elastic;
import static
org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValue;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.jackrabbit.oak.spi.state.NodeState;
@@ -52,6 +55,19 @@ public class ElasticPropertyDefinition extends
PropertyDefinition {
this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn,
PROP_USE_IN_FULL_TEXT_QUERY, true);
}
+ @Override
+ protected String getNamePropertyValue(NodeState definition, String
defaultName) {
+ PropertyState ps =
definition.getProperty(FulltextIndexConstants.PROP_NAME);
+ if (ps == null) {
+ return defaultName;
+ }
+ String value = ps.getValue(Type.STRING);
+ if (value.startsWith(".")) {
+ value = ":ignore_" + defaultName;
+ }
+ return value;
+ }
+
public KnnSearchParameters getKnnSearchParameters() {
return knnSearchParameters;
}
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
index d7465632a7..bb9149c0fd 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java
@@ -204,11 +204,35 @@ class ElasticIndexWriter implements
FulltextIndexWriter<ElasticDocument> {
if (ese.status() == 400 &&
ese.getMessage().contains("resource_already_exists_exception")) {
LOG.warn("Index {} already exists. Ignoring error", indexName);
} else {
+ LOG.warn("Failed to create index {}", indexName, ese);
+ StringBuilder sb = new StringBuilder();
+ int old = JsonpUtils.maxToStringLength();
+ try {
+ JsonpUtils.maxToStringLength(1_000_000);
+ JsonpUtils.toString(request, sb);
+ String[] array = splitLargeString(sb.toString(), 1024);
+ for (int i = 0; i < array.length; i++) {
+ LOG.warn("request chunk[{}] = {}", i, array[i]);
+ }
+ } finally {
+ JsonpUtils.maxToStringLength(old);
+ }
throw ese;
}
}
}
+ public static String[] splitLargeString(String largeString, int chunkSize)
{
+ int totalChunks = (largeString.length() + chunkSize - 1) / chunkSize;
+ String[] array = new String[totalChunks];
+ for (int i = 0; i < totalChunks; i++) {
+ int start = i * chunkSize;
+ int end = Math.min(start + chunkSize, largeString.length());
+ array[i] = largeString.substring(start, end);
+ }
+ return array;
+ }
+
private void enableIndex() throws IOException {
ElasticsearchIndicesClient client =
elasticConnection.getClient().indices();
// check if index already exists
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java
index 64c21c7377..f2055fe90e 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java
@@ -90,7 +90,7 @@ public class ElasticPropertyIndexTest extends
ElasticAbstractQueryTest {
long docCountBreachingBulkSize = (bulkSize / docSize) + 1;
// 250 is the default flush limit for bulk processor
Assert.assertTrue(docCountBreachingBulkSize < 250);
- String random = RandomStringUtils.random(docSize, true, true);
+ String random = RandomStringUtils.insecure().next(docSize, true,
true);
Tree test = root.getTree("/").addChild("test");
for (int i = 1; i <= docCountBreachingBulkSize; i++) {
@@ -175,6 +175,29 @@ public class ElasticPropertyIndexTest extends
ElasticAbstractQueryTest {
});
}
+ // OAK-11530
+ @Test
+ public void propertyWithDot() throws Exception {
+ IndexDefinitionBuilder builder = createIndex();
+ builder.includedPaths("/test")
+ .indexRule("nt:base")
+ .property("test", "./test");
+ setIndex("test1", builder);
+ root.commit();
+
+ //add content
+ root.getTree("/").addChild("test").setProperty("test", "1");
+ root.commit();
+
+ String query = "select [jcr:path] from [nt:base] " +
+ "where test = '1'";
+
+ assertEventually(() -> {
+ String explanation = explain(query);
+ assertThat(explanation, containsString("no-index"));
+ });
+ }
+
@Test
public void emptyIndex() throws Exception {
setIndex("test1", createIndex("propa", "propb"));
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriterTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriterTest.java
index ccd7dc2e05..2b58d80cfa 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriterTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriterTest.java
@@ -27,6 +27,7 @@ import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import java.io.IOException;
+import java.util.Arrays;
import static
org.apache.jackrabbit.oak.plugins.index.elastic.ElasticTestUtils.randomString;
import static org.hamcrest.CoreMatchers.not;
@@ -130,4 +131,17 @@ public class ElasticIndexWriterTest {
verify(bulkProcessorHandlerMock).update(anyString(),
any(ElasticDocument.class));
}
+ @Test
+ public void splitLargeString() {
+ assertEquals("[a]",
+ Arrays.toString(ElasticIndexWriter.splitLargeString(
+ "a", 1024)));
+ assertEquals("[h, e, l, l, o, , w, o, r, l, d]",
+ Arrays.toString(ElasticIndexWriter.splitLargeString(
+ "hello world", 1)));
+ assertEquals("[he, ll, o , wo, rl, d]",
+ Arrays.toString(ElasticIndexWriter.splitLargeString(
+ "hello world", 2)));
+ }
+
}
diff --git
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
index 0ebe6b61eb..90001b9ac9 100644
---
a/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
+++
b/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
@@ -140,7 +140,7 @@ public class PropertyDefinition {
public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState
defn) {
this.nodeName = nodeName;
this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
- this.name = getName(defn, nodeName);
+ this.name = getNamePropertyValue(defn, nodeName);
this.relative = isRelativeProperty(name);
this.boost = getOptionalValue(defn, FIELD_BOOST, DEFAULT_BOOST);
this.weight = getOptionalValue(defn, PROP_WEIGHT,
DEFAULT_PROPERTY_WEIGHT);
@@ -310,8 +310,7 @@ public class PropertyDefinition {
}
}
-
- private static String getName(NodeState definition, String defaultName) {
+ protected String getNamePropertyValue(NodeState definition, String
defaultName) {
PropertyState ps =
definition.getProperty(FulltextIndexConstants.PROP_NAME);
return ps == null ? defaultName : ps.getValue(Type.STRING);
}