This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-12133
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 9ba6d0b8acb31d53e3247986152f33b8bfc46c91
Author: Thomas Mueller <[email protected]>
AuthorDate: Wed Mar 11 11:22:04 2026 +0100

    OAK-12133 With the segment store, binary properties can not be aggregated
---
 .../plugins/index/lucene/LuceneDocumentMaker.java  |   9 +-
 .../run/query/BinaryStorageWithBlobStoreTest.java  | 159 ++++++++++++++
 .../IndexRegexPropertyWithBinaryExcludedTest.java  | 228 +++++++++++++++++++++
 .../index/elastic/index/ElasticDocumentMaker.java  |   7 +-
 .../oak/segment/SegmentPropertyState.java          |  35 +++-
 5 files changed, 434 insertions(+), 4 deletions(-)

diff --git 
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
 
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
index 6192b54ae1..97756f04cb 100644
--- 
a/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
+++ 
b/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
@@ -131,11 +131,16 @@ public class LuceneDocumentMaker extends 
FulltextDocumentMaker<Document> {
             f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), 
Field.Store.NO);
         } else if (tag == Type.BOOLEAN.tag()) {
             f = new StringField(pname, property.getValue(Type.BOOLEAN, 
i).toString(), Field.Store.NO);
+        } else if (tag == Type.BINARY.tag()) {
+            // ignore - never call getValue(Type.STRING) on a binary (see 
OAK-12133)
+            f = null;
         } else {
             f = new StringField(pname, property.getValue(Type.STRING, i), 
Field.Store.NO);
         }
 
-        doc.add(f);
+        if (f != null) {
+            doc.add(f);
+        }
     }
 
     @Override
@@ -294,7 +299,7 @@ public class LuceneDocumentMaker extends 
FulltextDocumentMaker<Document> {
                         new 
BytesRef(property.getValue(Type.BOOLEAN).toString()));
             } else if (tag == Type.STRING.tag()) {
                 String stringValue = property.getValue(Type.STRING);
-                // Truncate the value as lucene limits the length of a 
SortedDocValueField string to 
+                // Truncate the value as lucene limits the length of a 
SortedDocValueField string to
                 // STRING_PROPERTY_MAX_LENGTH(32766 bytes) and throws 
exception if over the limit
                 f = new SortedDocValuesField(name, getTruncatedBytesRef(name, 
stringValue, this.path,
                         STRING_PROPERTY_MAX_LENGTH));
diff --git 
a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/query/BinaryStorageWithBlobStoreTest.java
 
b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/query/BinaryStorageWithBlobStoreTest.java
new file mode 100644
index 0000000000..6550df1ab6
--- /dev/null
+++ 
b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/query/BinaryStorageWithBlobStoreTest.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run.query;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+
+import javax.jcr.Node;
+import javax.jcr.Property;
+import javax.jcr.Repository;
+import javax.jcr.Session;
+import javax.jcr.SimpleCredentials;
+
+import org.apache.jackrabbit.api.JackrabbitRepository;
+import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.jcr.Jcr;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
+import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
+import org.apache.jackrabbit.oak.segment.file.FileStore;
+import org.apache.jackrabbit.oak.segment.file.FileStoreBuilder;
+import org.apache.jackrabbit.oak.segment.file.InvalidFileStoreVersionException;
+import org.apache.jackrabbit.oak.spi.blob.BlobStore;
+import org.apache.jackrabbit.oak.spi.security.user.UserConstants;
+import org.apache.jackrabbit.core.data.FileDataStore;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ * Test to verify that binary properties throw IllegalStateException with a 
clear error message
+ * when read as STRING - this happens when they're stored externally in 
BlobStore.
+ * Verifies the exception message includes the property name and helpful 
guidance (OAK-12133).
+ */
+public class BinaryStorageWithBlobStoreTest {
+
+    @Rule
+    public TemporaryFolder folder = new TemporaryFolder(new File("target"));
+
+    private Repository repository;
+    private FileStore fileStore;
+    private Session session;
+    private BlobStore blobStore;
+
+    @Before
+    public void setup() throws Exception {
+        // Create external BlobStore
+        FileDataStore fds = new FileDataStore();
+        fds.setMinRecordLength(0); // Store ALL binaries externally, even tiny 
ones
+        fds.init(folder.newFolder("blobstore").getAbsolutePath());
+        blobStore = new DataStoreBlobStore(fds);
+
+        fileStore = createFileStore();
+        repository = createRepository(fileStore, blobStore);
+        session = repository.login(new 
SimpleCredentials(UserConstants.DEFAULT_ADMIN_ID,
+                UserConstants.DEFAULT_ADMIN_ID.toCharArray()));
+    }
+
+    @After
+    public void tearDown() {
+        if (session != null) {
+            session.logout();
+        }
+        if (repository instanceof JackrabbitRepository) {
+            ((JackrabbitRepository) repository).shutdown();
+        }
+        if (fileStore != null) {
+            fileStore.close();
+        }
+    }
+
+    private FileStore createFileStore() throws IOException, 
InvalidFileStoreVersionException {
+        return FileStoreBuilder.fileStoreBuilder(folder.getRoot())
+                .withBlobStore(blobStore)
+                .withBinariesInlineThreshold(10) // Force binaries > 10 bytes 
to be external!
+                .build();
+    }
+
+    private Repository createRepository(FileStore fileStore, BlobStore 
blobStore) {
+        return new Jcr(new 
Oak(SegmentNodeStoreBuilders.builder(fileStore).build()))
+                .createRepository();
+    }
+
+    @Test
+    public void testSmallBinaryAsStringWithExternalBlobStore() throws 
Exception {
+        Node test = session.getRootNode().addNode("test", "nt:unstructured");
+
+        byte[] data = new byte[45];
+        for (int i = 0; i < data.length; i++) {
+            data[i] = (byte) i;
+        }
+
+        test.setProperty("binary1", session.getValueFactory().createBinary(
+                new ByteArrayInputStream(data)));
+
+        session.save();
+
+        Property prop = test.getProperty("binary1");
+
+        try {
+            prop.getString();
+            fail("Expected IllegalStateException when reading binary property 
as string");
+        } catch (IllegalStateException e) {
+            String message = e.getMessage();
+            assertTrue("Exception message should mention the property name 
'binary1', but was: " + message,
+                    message.contains("binary1"));
+            assertTrue("Exception message should mention 'Attempting to read 
binary property', but was: " + message,
+                    message.contains("Attempting to read binary property"));
+            assertTrue("Exception message should mention 'can fail if the 
binary is stored externally', but was: " + message,
+                    message.contains("can fail if the binary is stored 
externally"));
+        }
+    }
+
+    @Test
+    public void testBinaryAsStringWithExternalBlobStore() throws Exception {
+        Node test = session.getRootNode().addNode("test2", "nt:unstructured");
+
+        byte[] data = new byte[17 * 1024];
+
+        test.setProperty("binary2", session.getValueFactory().createBinary(
+                new ByteArrayInputStream(data)));
+
+        session.save();
+
+        Property prop = test.getProperty("binary2");
+
+        try {
+            prop.getString();
+            fail("Expected IllegalStateException when reading binary property 
as string");
+        } catch (IllegalStateException e) {
+            String message = e.getMessage();
+            assertTrue("Exception message should mention the property name 
'binary2', but was: " + message,
+                    message.contains("binary2"));
+            assertTrue("Exception message should mention 'Attempting to read 
binary property', but was: " + message,
+                    message.contains("Attempting to read binary property"));
+            assertTrue("Exception message should mention 'can fail if the 
binary is stored externally', but was: " + message,
+                    message.contains("can fail if the binary is stored 
externally"));
+        }
+    }
+}
diff --git 
a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/query/IndexRegexPropertyWithBinaryExcludedTest.java
 
b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/query/IndexRegexPropertyWithBinaryExcludedTest.java
new file mode 100644
index 0000000000..984743a36f
--- /dev/null
+++ 
b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/query/IndexRegexPropertyWithBinaryExcludedTest.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run.query;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.jcr.Node;
+import javax.jcr.PropertyType;
+import javax.jcr.Repository;
+import javax.jcr.RepositoryException;
+import javax.jcr.Session;
+import javax.jcr.SimpleCredentials;
+import javax.jcr.query.Query;
+import javax.jcr.query.QueryManager;
+import javax.jcr.query.QueryResult;
+import javax.jcr.query.Row;
+import javax.jcr.query.RowIterator;
+
+import org.apache.jackrabbit.api.JackrabbitRepository;
+import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.jcr.Jcr;
+import 
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider;
+import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
+import org.apache.jackrabbit.oak.segment.file.FileStore;
+import org.apache.jackrabbit.oak.segment.file.FileStoreBuilder;
+import org.apache.jackrabbit.oak.segment.file.InvalidFileStoreVersionException;
+import org.apache.jackrabbit.oak.spi.blob.BlobStore;
+import org.apache.jackrabbit.oak.spi.security.user.UserConstants;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
+import org.apache.jackrabbit.core.data.FileDataStore;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ * Tests indexing of regex property patterns with binary type exclusion.
+ * Uses 17KB binaries to ensure they're stored externally in BlobStore
+ * (above the default binariesInlineThreshold of ~16KB).
+ */
+public class IndexRegexPropertyWithBinaryExcludedTest {
+
+    @Rule
+    public TemporaryFolder folder = new TemporaryFolder(new File("target"));
+
+    private Repository repository;
+    private FileStore fileStore;
+    private Session session;
+    private QueryManager queryManager;
+    private BlobStore blobStore;
+
+    @Before
+    public void setup() throws Exception {
+        FileDataStore fds = new FileDataStore();
+        fds.setMinRecordLength(4096);
+        fds.init(folder.newFolder("blobstore").getAbsolutePath());
+        blobStore = new DataStoreBlobStore(fds);
+
+        fileStore = createFileStore();
+        repository = createRepository(fileStore);
+        session = repository.login(new 
SimpleCredentials(UserConstants.DEFAULT_ADMIN_ID,
+                UserConstants.DEFAULT_ADMIN_ID.toCharArray()));
+        queryManager = session.getWorkspace().getQueryManager();
+    }
+
+    @After
+    public void tearDown() {
+        if (session != null) {
+            session.logout();
+        }
+        if (repository instanceof JackrabbitRepository) {
+            ((JackrabbitRepository) repository).shutdown();
+        }
+        if (fileStore != null) {
+            fileStore.close();
+        }
+    }
+
+    private FileStore createFileStore() throws IOException, 
InvalidFileStoreVersionException {
+        return FileStoreBuilder.fileStoreBuilder(folder.getRoot())
+                .withBlobStore(blobStore)
+                .build();
+    }
+
+    private Repository createRepository(FileStore fileStore) {
+        LuceneIndexProvider provider = new LuceneIndexProvider();
+        LuceneIndexEditorProvider editorProvider = new 
LuceneIndexEditorProvider();
+
+        return new Jcr(new 
Oak(SegmentNodeStoreBuilders.builder(fileStore).build()))
+                .with((org.apache.jackrabbit.oak.spi.query.QueryIndexProvider) 
provider)
+                .with((org.apache.jackrabbit.oak.spi.commit.Observer) provider)
+                .with(editorProvider)
+                .withAsyncIndexing("async", 1)
+                .createRepository();
+    }
+
+    @Test
+    public void indexRegexPropertyWithBinaryExcluded() throws Exception {
+        // Create index definition
+        Node oakIndex = session.getRootNode().getNode("oak:index");
+        Node index = oakIndex.addNode("testRegexBinary", 
"oak:QueryIndexDefinition");
+        index.setProperty("type", "lucene");
+        index.setProperty("async", new String[]{"async", "nrt"}, 
PropertyType.STRING);
+        index.setProperty("compatVersion", 2);
+        index.setProperty("tags", new String[]{"abc"}, PropertyType.STRING);
+        index.setProperty("selectionPolicy", "tag");
+        index.setProperty("includedPaths", "/content");
+        index.setProperty("queryPaths", "/content");
+
+        // indexRules
+        Node indexRules = index.addNode("indexRules", "nt:unstructured");
+        Node ntBase = indexRules.addNode("nt:base", "nt:unstructured");
+        ntBase.setProperty("includePropertyTypes",
+                new String[]{"String", "Name"}, PropertyType.STRING);
+        Node properties = ntBase.addNode("properties", "nt:unstructured");
+
+        // regex property for child/.*
+        Node childProp = properties.addNode("childProp", "nt:unstructured");
+        childProp.setProperty("name", ".*");
+        childProp.setProperty("propertyIndex", true);
+        childProp.setProperty("isRegexp", true);
+        childProp.setProperty("nodeScopeIndex", true);
+        childProp.setProperty("analyzed", true);
+        childProp.setProperty("useInExcerpt", true);
+        childProp.setProperty("useInSpellcheck", true);
+
+        session.save();
+
+        // Test content
+        Node content = session.getRootNode().addNode("content", 
"nt:unstructured");
+
+        Node one = content.addNode("one", "nt:unstructured");
+        one.setProperty("title", "test content");
+        Node oneChild = one.addNode("child", "nt:unstructured");
+        oneChild.setProperty("data", "searchable text");
+        // A 17KB binary to ensure it's stored externally in BlobStore
+        oneChild.setProperty("binary", session.getValueFactory().createBinary(
+                new ByteArrayInputStream(createLargeBinaryData(17 * 1024))));
+
+        Node two = content.addNode("two", "nt:unstructured");
+        two.setProperty("title", "another test");
+        Node twoChild = two.addNode("child", "nt:unstructured");
+        twoChild.setProperty("data", "searchable text");
+        twoChild.setProperty("binary", session.getValueFactory().createBinary(
+                new ByteArrayInputStream(createLargeBinaryData(17 * 1024))));
+
+        session.save();
+
+        // Query
+        String xpath = "/jcr:root/content//*[jcr:contains(., 'searchable')] 
option(index tag abc, traversal fail)";
+
+        // Wait for async indexing with retry
+        List<String> results = null;
+        AssertionError lastError = null;
+        for (int attempt = 0; attempt < 300; attempt++) {
+            try {
+                Query query = queryManager.createQuery("explain " + xpath, 
"xpath");
+                RowIterator rowIt = query.execute().getRows();
+                while (rowIt.hasNext()) {
+                    
assertTrue(rowIt.nextRow().toString().contains("lucene:testRegexBinary"));
+                }
+                query = queryManager.createQuery(xpath, "xpath");
+                QueryResult result = query.execute();
+                results = getResultPaths(result);
+
+                // Verify we found both child nodes
+                assertEquals("Expected 2 results", 2, results.size());
+                assertTrue("Expected /content/one/child in results", 
results.contains("/content/one/child"));
+                assertTrue("Expected /content/two/child in results", 
results.contains("/content/two/child"));
+
+                // Success - exit retry loop
+                return;
+            } catch (AssertionError e) {
+                lastError = e;
+                Thread.sleep(100);
+            }
+        }
+
+        if (lastError != null) {
+            throw new AssertionError("Query did not return expected results 
after 30 attempts. Last results: " + results, lastError);
+        }
+    }
+
+    /**
+     * Create a large binary data array of specified size.
+     * Uses a simple pattern to fill the data.
+     */
+    private byte[] createLargeBinaryData(int sizeInBytes) {
+        byte[] data = new byte[sizeInBytes];
+        for (int i = 0; i < sizeInBytes; i++) {
+            data[i] = (byte) (i % 256);
+        }
+        return data;
+    }
+
+    private List<String> getResultPaths(QueryResult result) throws 
RepositoryException {
+        List<String> paths = new ArrayList<>();
+        RowIterator rows = result.getRows();
+        while (rows.hasNext()) {
+            Row row = rows.nextRow();
+            paths.add(row.getPath());
+        }
+        return paths;
+    }
+}
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
index dd856dfd07..3929fe655e 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
@@ -190,11 +190,16 @@ public class ElasticDocumentMaker extends 
FulltextDocumentMaker<ElasticDocument>
                 f = property.getValue(Type.DOUBLE, i);
             } else if (tag == Type.BOOLEAN.tag()) {
                 f = property.getValue(Type.BOOLEAN, i).toString();
+            } else if (tag == Type.BINARY.tag()) {
+                // ignore - never call getValue(Type.STRING) on a binary (see 
OAK-12133)
+                f = null;
             } else {
                 f = property.getValue(Type.STRING, i);
             }
 
-            doc.addProperty(fieldName, f);
+            if (f != null) {
+                doc.addProperty(fieldName, f);
+            }
         } catch (Exception e) {
             if (!LOG_SILENCER.silence(LOG_KEY_COULD_NOT_CONVERT_PROPERTY)) {
                 LOG.warn(
diff --git 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentPropertyState.java
 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentPropertyState.java
index 68b5e18f81..d5bfffda1c 100644
--- 
a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentPropertyState.java
+++ 
b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentPropertyState.java
@@ -49,10 +49,13 @@ import javax.jcr.PropertyType;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.conditions.Validate;
+import org.apache.jackrabbit.oak.commons.log.LogSilencer;
 import org.apache.jackrabbit.oak.plugins.memory.AbstractPropertyState;
 import org.apache.jackrabbit.oak.plugins.value.Conversions;
 import org.apache.jackrabbit.oak.plugins.value.Conversions.Converter;
 import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A property, which can read a value or list record from a segment. It
@@ -62,6 +65,10 @@ import org.jetbrains.annotations.NotNull;
  * of type "LIST" (for arrays).
  */
 public class SegmentPropertyState extends Record implements PropertyState {
+    private static final Logger LOG = 
LoggerFactory.getLogger(SegmentPropertyState.class);
+
+    private static final LogSilencer LOG_SILENCER = new LogSilencer(10_000, 
64);
+
     @NotNull
     private final SegmentReader reader;
 
@@ -190,7 +197,33 @@ public class SegmentPropertyState extends Record 
implements PropertyState {
             return (T) reader.readBlob(id); // load binaries lazily
         }
 
-        String value = reader.readString(id);
+        // OAK-12133: Detect when trying to read a binary property as a string
+        Type<?> actualType = getType();
+        if (actualType.isArray()) {
+            actualType = actualType.getBaseType();
+        }
+        String message = null;
+        if (actualType == BINARY && type != BINARY) {
+            message = String.format(
+                "Attempting to read binary property '%s' as %s. " +
+                "This can fail if the binary is stored externally. " +
+                "Binary properties should be checked before getValue() is 
called.",
+                name, type);
+            if (!LOG_SILENCER.silence(name)) {
+                LOG.warn(message, new Exception("Stack trace"));
+            }
+        }
+
+        String value;
+        try {
+            value = reader.readString(id);
+        } catch (IllegalStateException e) {
+            if (message != null && e.getMessage() != null &&
+                    e.getMessage().contains("possibly trying to read a BLOB 
using getString")) {
+                throw new IllegalStateException(message, e);
+            }
+            throw e;
+        }
         if (type == STRING || type == URI || type == DATE
                 || type == NAME || type == PATH
                 || type == REFERENCE || type == WEAKREFERENCE) {

Reply via email to