This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 08dfecd143 OAK-11781 Binary reference statistics are inaccurate for
very large repositories (#2359)
08dfecd143 is described below
commit 08dfecd1431701187af9252ff8a0877ab481be7d
Author: Thomas Mueller <[email protected]>
AuthorDate: Tue Jul 1 13:28:38 2025 +0300
OAK-11781 Binary reference statistics are inaccurate for very large
repositories (#2359)
---
.../oak/index/indexer/document/flatfile/analysis/StatsBuilder.java | 2 +-
.../indexer/document/flatfile/analysis/modules/DistinctBinarySize.java | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
index 018670e97d..2bac1d025f 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/StatsBuilder.java
@@ -90,7 +90,7 @@ public class StatsBuilder {
collectors.add(new BinarySizeHistogram(1));
collectors.add(new TopLargestBinaries(10));
collectors.add(new DistinctBinarySizeHistogram(1));
- collectors.add(new DistinctBinarySize(16, 16));
+ collectors.add(new DistinctBinarySize(32, 128));
Profiler prof = null;
if (profiler) {
diff --git
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
index d1583007c7..272c7f357b 100644
---
a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
+++
b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.java
@@ -174,6 +174,7 @@ public class DistinctBinarySize implements StatsCollector {
double fpp = BloomFilter.calculateFpp(smallBinariesEstimatedCount,
bloomFilter.getBitCount(), bloomFilter.getK());
long bloomFilterEstimatedSize = bloomFilterMinSize;
bloomFilterEstimatedSize += fpp * bloomFilterIgnoredSize;
+ storage.add("small Bloom filter ffp*1000", (long) (fpp * 1000));
storage.add("small binaries count", smallBinariesEstimatedCount);
storage.add("small binaries HLL count",
smallBinariesEstimatedCountHLL);
storage.add("small binaries size", bloomFilterEstimatedSize);