This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-11478 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 9397c54439fd0ba92f5377c89b1afbc0f112e0ab Author: Thomas Mueller <[email protected]> AuthorDate: Wed Feb 12 14:12:06 2025 +0100 OAK-11478 Node store statistics: support the tree store --- .../document/flatfile/analysis/StatsBuilder.java | 31 ++++++-- .../flatfile/analysis/stream/NodeLineReader.java | 2 +- .../analysis/stream/NodeTreeStoreReader.java | 82 ++++++++++++++++++++++ 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java index f87216d249..5709fd2d05 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java @@ -37,6 +37,7 @@ import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeLineReader; import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeStreamReader; import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeStreamReaderCompressed; +import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeTreeStoreReader; /** * Builder for commonly used statistics for flat file stores. @@ -53,8 +54,10 @@ public class StatsBuilder { public static void main(String... args) throws Exception { String fileName = null; String nodeNameFilter = null; + boolean profiler = false; boolean stream = false; boolean compressedStream = false; + boolean treeStore = false; for(int i = 0; i<args.length; i++) { String a = args[i]; if (a.equals("--fileName")) { @@ -65,6 +68,10 @@ public class StatsBuilder { stream = true; } else if (a.equals("--compressedStream")) { compressedStream = true; + } else if (a.equals("--treeStore")) { + treeStore = true; + } else if (a.equals("--profiler")) { + profiler = true; } } if (fileName == null) { @@ -73,6 +80,8 @@ public class StatsBuilder { System.out.println(" --nodeNameFilter <filter> (node name filter for binaries; optional)"); System.out.println(" --stream (use a stream file; optional)"); System.out.println(" --compressedStream (use a compressed stream file; optional)"); + System.out.println(" --treeStore (the file is a tree store; optional)"); + System.out.println(" --profiler (enable the build-in profiler; optional)"); return; } System.out.println("Processing " + fileName); @@ -94,9 +103,14 @@ public class StatsBuilder { collectors.add(new DistinctBinarySizeHistogram(1)); collectors.add(new DistinctBinarySize(16, 16)); - Profiler prof = new Profiler().startCollecting(); + Profiler prof = null; + if (profiler) { + prof = new Profiler().startCollecting(); + } NodeDataReader reader; - if (compressedStream) { + if (treeStore) { + reader = NodeTreeStoreReader.open(fileName); + } else if (compressedStream) { reader = NodeStreamReaderCompressed.open(fileName); } else if (stream) { reader = NodeStreamReader.open(fileName); @@ -105,7 +119,9 @@ public class StatsBuilder { } collect(reader, collectors); - System.out.println(prof.getTop(10)); + if (profiler) { + System.out.println(prof.getTop(10)); + } System.out.println(); System.out.println("Results"); System.out.println(); @@ -123,8 +139,13 @@ public class StatsBuilder { if (node == null) { break; } - if (++lineCount % 1000000 == 0) { - System.out.println(lineCount + " lines; " + reader.getProgressPercent() + "%"); + if (++lineCount % 1_000_000 == 0) { + String msg = lineCount + " entries"; + int progressPercent = reader.getProgressPercent(); + if (progressPercent != 0) { + msg += "; " + progressPercent + "%"; + } + System.out.println(msg); } if (ONLY_READ) { continue; diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeLineReader.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeLineReader.java index 6a45bc52f8..dda7b80ad4 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeLineReader.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeLineReader.java @@ -101,7 +101,7 @@ public class NodeLineReader implements NodeDataReader, Closeable { return new NodeData(pathElements, parse(nodeJson)); } - private static List<NodeProperty> parse(String nodeData) { + static List<NodeProperty> parse(String nodeData) { ArrayList<NodeProperty> properties = new ArrayList<>(); JsonObject json = JsonObject.fromJson(nodeData, true); for(Entry<String, String> e : json.getProperties().entrySet()) { diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeTreeStoreReader.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeTreeStoreReader.java new file mode 100644 index 0000000000..4cc215ddfd --- /dev/null +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/stream/NodeTreeStoreReader.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.jackrabbit.oak.commons.PathUtils; +import org.apache.jackrabbit.oak.index.indexer.document.flatfile.NodeStateEntryReader; +import org.apache.jackrabbit.oak.index.indexer.document.tree.TreeStore; +import org.apache.jackrabbit.oak.spi.blob.BlobStore; + +/** + * A reader for tree store files. + */ +public class NodeTreeStoreReader implements NodeDataReader { + + private final TreeStore treeStore; + private final Iterator<String> pathIterator; + private final long fileSize; + + public static NodeDataReader open(String fileName) { + BlobStore blobStore = null; + NodeStateEntryReader entryReader = new NodeStateEntryReader(blobStore); + File file = new File(fileName); + TreeStore treeStore = new TreeStore("reader", file, entryReader, 32); + return new NodeTreeStoreReader(treeStore, file.length()); + } + + private NodeTreeStoreReader(TreeStore treeStore, long fileSize) { + this.treeStore = treeStore; + this.fileSize = fileSize; + this.pathIterator = treeStore.iteratorOverPaths(); + } + + @Override + public void close() throws IOException { + treeStore.close(); + } + + @Override + public NodeData readNode() throws IOException { + if (!pathIterator.hasNext()) { + return null; + } + String path = pathIterator.next(); + List<String> pathElements = new ArrayList<>(); + PathUtils.elements(path).forEach(pathElements::add); + String nodeJson = treeStore.getSession().get(path); + return new NodeData(pathElements, NodeLineReader.parse(nodeJson)); + } + + @Override + public long getFileSize() { + return fileSize; + } + + @Override + public int getProgressPercent() { + return 0; + } + +}
