This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch OAK-11457
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 03c03e742434098983bd3456af45f081c9074d6d
Author: Thomas Mueller <[email protected]>
AuthorDate: Wed Feb 5 18:02:55 2025 +0100

    OAK-11457 Tree store sometimes contains bundled properties
---
 .../flatfile/pipelined/PipelinedTreeStoreTask.java | 73 +++++++++++++++++++++-
 .../store/RemovePropertiesOfBundledNodesTest.java  | 56 +++++++++++++++++
 2 files changed, 128 insertions(+), 1 deletion(-)

diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedTreeStoreTask.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedTreeStoreTask.java
index d8688eb4e2..9e855eda96 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedTreeStoreTask.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/pipelined/PipelinedTreeStoreTask.java
@@ -34,12 +34,17 @@ import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.jackrabbit.guava.common.base.Stopwatch;
+import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
+import org.apache.jackrabbit.oak.commons.json.JsopReader;
+import org.apache.jackrabbit.oak.commons.json.JsopTokenizer;
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader;
 import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedSortBatchTask.Result;
 import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore;
 import org.apache.jackrabbit.oak.index.indexer.document.tree.store.TreeSession;
 import org.apache.jackrabbit.oak.plugins.index.IndexingReporter;
 import org.apache.jackrabbit.oak.plugins.index.MetricsFormatter;
 import org.apache.jackrabbit.oak.plugins.index.MetricsUtils;
+import org.apache.jackrabbit.oak.spi.blob.MemoryBlobStore;
 import org.apache.jackrabbit.oak.stats.StatisticsProvider;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -207,7 +212,9 @@ public class PipelinedTreeStoreTask implements 
Callable<PipelinedSortBatchTask.R
                 int valueLength = buffer.getInt();
                 String value = new String(buffer.array(), buffer.arrayOffset() 
+ buffer.position(), valueLength, StandardCharsets.UTF_8);
                 textSize += entry.getPath().length() + value.length() + 2;
-                treeStore.putNode(entry.getPath(), value);
+                String path = entry.getPath();
+                value = removePropertiesOfBundledNodes(path, value);
+                treeStore.putNode(path, value);
             }
             session.checkpoint();
             unmergedRoots++;
@@ -230,4 +237,68 @@ public class PipelinedTreeStoreTask implements 
Callable<PipelinedSortBatchTask.R
         }
     }
 
+    /**
+     * If there are any, remove properties of bundled nodes (jcr:content/...) 
from the JSON-encoded node.
+     *
+     * @param path the path
+     * @param value the JSON-encoded node
+     * @return the cleaned JSON
+     */
+    public static String removePropertiesOfBundledNodes(String path, String 
value) {
+        if (value.indexOf("\"jcr:content/") < 0) {
+            return value;
+        }
+        // possibly the node contains a bundled property, but we are not sure
+        // try to de-serialize
+        NodeStateEntryReader nodeReader = new NodeStateEntryReader(new 
MemoryBlobStore());
+        try {
+            // the following line will throw an exception if de-serialization 
fails
+            nodeReader.read(path + "|" + value);
+            // ok it did not: it was a false positive
+            return value;
+        } catch (Exception e) {
+            LOG.warn("Path {} value {}", path, value);
+            JsopReader reader = new JsopTokenizer(value);
+            JsopBuilder writer = new JsopBuilder();
+            reader.read('{');
+            writer.object();
+            if (!reader.matches('}')) {
+                do {
+                    String key = reader.readString();
+                    reader.read(':');
+                    // skip properties that contain "/"
+                    boolean skip = key.indexOf('/') >= 0;
+                    if (!skip) {
+                        writer.key(key);
+                    }
+                    if (reader.matches('[')) {
+                        if (!skip) {
+                            writer.array();
+                        }
+                        do {
+                            String raw = reader.readRawValue();
+                            if (!skip) {
+                                writer.encodedValue(raw);
+                            }
+                        } while (reader.matches(','));
+                        reader.read(']');
+                        if (!skip) {
+                            writer.endArray();
+                        }
+                    } else {
+                        String raw = reader.readRawValue();
+                        if (!skip) {
+                            writer.encodedValue(raw);
+                        }
+                    }
+                } while (reader.matches(','));
+            }
+            reader.read('}');
+            writer.endObject();
+            String result = writer.toString();
+            LOG.warn("Cleaned {} : {}", path, result);
+            return result;
+        }
+    }
+
 }
diff --git 
a/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/RemovePropertiesOfBundledNodesTest.java
 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/RemovePropertiesOfBundledNodesTest.java
new file mode 100644
index 0000000000..a4c8ea4f92
--- /dev/null
+++ 
b/oak-run-commons/src/test/java/org/apache/jackrabbit/oak/index/indexer/document/tree/store/RemovePropertiesOfBundledNodesTest.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.index.indexer.document.tree.store;
+
+import static org.junit.Assert.assertEquals;
+
+import 
org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedTreeStoreTask;
+import org.junit.Test;
+
+public class RemovePropertiesOfBundledNodesTest {
+
+    @Test
+    public void cleanUp() {
+        // this is similar to the real-world case
+        
verify("{\"jcr:created\":\"dat:2020-05-06T17:15:13.971Z\",\"jcr:primaryType\":\"nam:nt:file\",\"jcr:createdBy\":\"admin\",\"jcr:content/jcr:lastModified\":\"dat:2025-01-21T03:37:42.095Z\",\"jcr:content/jcr:lastModifiedBy\":\"test\"}",
+                
"{\"jcr:created\":\"dat:2020-05-06T17:15:13.971Z\",\"jcr:primaryType\":\"nam:nt:file\",\"jcr:createdBy\":\"admin\"}");
+
+        // generic entries
+        verify("{}", "{}");
+        verify("{\"c\":null,\"b\":\"x\",\"a\":123,\"d\":[1,2,null,\"x\"]}",
+                "{\"c\":null,\"b\":\"x\",\"a\":123,\"d\":[1,2,null,\"x\"]}");
+
+        // false positive
+        verify("{\"c\":\"jcr:content/that\"}",
+                "{\"c\":\"jcr:content/that\"}");
+
+        // generic entries that need cleaning
+        
verify("{\"c\":null,\"jcr:content/this\":null,\"a\":123,\"jcr:content/that\":[1,2,null,\"x\"]}",
+                "{\"c\":null,\"a\":123}");
+        
verify("{\"c\":null,\"jcr:content/this\":null,\"a\":123,\"array\":[1,2,null,\"x\"]}",
+                "{\"c\":null,\"a\":123,\"array\":[1,2,null,\"x\"]}");
+
+    }
+
+    static void verify(String input, String expected) {
+        String v2 = 
PipelinedTreeStoreTask.removePropertiesOfBundledNodes("/test", input);
+        assertEquals(expected, v2);
+
+    }
+}

Reply via email to