This is an automated email from the ASF dual-hosted git repository.

thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 08dfecd143 OAK-11781 Binary reference statistics are inaccurate for 
very large repositories (#2359)
08dfecd143 is described below

commit 08dfecd1431701187af9252ff8a0877ab481be7d
Author: Thomas Mueller <[email protected]>
AuthorDate: Tue Jul 1 13:28:38 2025 +0300

    OAK-11781 Binary reference statistics are inaccurate for very large 
repositories (#2359)
---
 .../oak/index/indexer/document/flatfile/analysis/StatsBuilder.java      | 2 +-
 .../indexer/document/flatfile/analysis/modules/DistinctBinarySize.java  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
index 018670e97d..2bac1d025f 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
@@ -90,7 +90,7 @@ public class StatsBuilder {
         collectors.add(new BinarySizeHistogram(1));
         collectors.add(new TopLargestBinaries(10));
         collectors.add(new DistinctBinarySizeHistogram(1));
-        collectors.add(new DistinctBinarySize(16, 16));
+        collectors.add(new DistinctBinarySize(32, 128));
 
         Profiler prof = null;
         if (profiler) {
diff --git 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
index d1583007c7..272c7f357b 100644
--- 
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
+++ 
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
@@ -174,6 +174,7 @@ public class DistinctBinarySize implements StatsCollector {
         double fpp = BloomFilter.calculateFpp(smallBinariesEstimatedCount, 
bloomFilter.getBitCount(), bloomFilter.getK());
         long bloomFilterEstimatedSize = bloomFilterMinSize;
         bloomFilterEstimatedSize += fpp * bloomFilterIgnoredSize;
+        storage.add("small Bloom filter ffp*1000", (long) (fpp * 1000));
         storage.add("small binaries count", smallBinariesEstimatedCount);
         storage.add("small binaries HLL count", 
smallBinariesEstimatedCountHLL);
         storage.add("small binaries size", bloomFilterEstimatedSize);

Reply via email to