dant3 commented on code in PR #7513: URL: https://github.com/apache/ignite-3/pull/7513#discussion_r2772871779
########## modules/page-memory/src/jmh/java/org/apache/ignite/internal/pagememory/benchmark/PageReplacementBenchmark.java: ########## @@ -0,0 +1,829 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.pagememory.benchmark; + +import static org.apache.ignite.internal.pagememory.PageIdAllocator.FLAG_DATA; + +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.ignite.internal.lang.IgniteInternalCheckedException; +import org.apache.ignite.internal.pagememory.TestPageIoModule.TestSimpleValuePageIo; +import org.apache.ignite.internal.pagememory.configuration.ReplacementMode; +import org.apache.ignite.internal.pagememory.persistence.checkpoint.CheckpointProgress; +import org.apache.ignite.internal.pagememory.persistence.checkpoint.CheckpointState; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.infra.ThreadParams; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * JMH benchmark measuring page cache replacement policy performance in PersistentPageMemory. + * + * <p>This benchmark measures page cache miss handling, replacement overhead, and access latency + * under memory pressure using realistic access patterns (Zipfian distribution modeling hot/cold data). + * + * <p><b>What This Benchmark Measures:</b> + * <ul> + * <li>Page cache miss and replacement overhead for different policies</li> + * <li>Throughput and latency under single-threaded and multi-threaded contention</li> + * <li>Policy effectiveness at retaining hot pages under memory pressure</li> + * </ul> + * + * <p><b>Configuration:</b> + * <ul> + * <li>Region: 20 MiB (5,120 pages at 4 KiB/page) - sized to force replacements without excessive setup time</li> + * <li>Partitions: 16 (power-of-2 for efficient modulo, representative of small-to-medium tables)</li> + * <li>Cache Pressure: Working set exceeds capacity by 1.2x, 2x, or 4x</li> + * <li>Access Pattern: Zipfian with skew=0.99 (commonly used in YCSB)</li> + * </ul> + * + * <p><b>Important Limitations:</b> + * <ul> + * <li><b>Read-only workload:</b> Does not test dirty page eviction, write amplification, + * or checkpoint blocking. Real workloads have 10-30% writes which significantly + * impact replacement behavior.</li> + * <li><b>Checkpoint lock held during measurement:</b> Each benchmark iteration acquires + * checkpoint read lock before measurement and releases after (see ThreadState.setupIteration). + * This eliminates checkpoint contention from measurements but means we don't measure + * checkpoint lock acquisition overhead (100ms+ blocking every 30-60s in production). + * Note: Lock is NOT held during setup - checkpoints occur between allocation batches.</li> + * <li><b>Single access pattern:</b> Tests pure Zipfian only. Real workloads mix + * hot key access, range scans, and bulk operations.</li> + * <li><b>Pre-warmed cache:</b> Does not measure cold start or cache warming behavior.</li> + * </ul> + * + * <p><b>Results Interpretation:</b> This benchmark measures page cache replacement efficiency + * in isolation. Production performance will be lower due to checkpoint contention, dirty page + * writes, and mixed workload patterns not represented here. + * + * <p><b>Benchmark Matrix:</b> 3 policies × 3 pressures × 4 methods = 36 configurations + * (approximately 18-20 minutes depending on hardware) + */ +@Warmup(iterations = 5, time = 2) +@Measurement(iterations = 10, time = 2) +@Fork(1) +@State(Scope.Benchmark) +public class PageCacheReplacementBenchmark extends PersistentPageMemoryBenchmarkBase { + + /** + * Small region size to force page replacements quickly. + * Using literal instead of Constants.MiB for invariance. + */ + private static final long SMALL_REGION_SIZE = 20L * 1024 * 1024; // 20 MiB + + /** Expected page size for this benchmark. Must match PAGE_SIZE from base class. */ + private static final int EXPECTED_PAGE_SIZE = 4 * 1024; // 4 KiB + + /** + * Zipfian skew parameter: 0.99 creates strong hot/cold separation. + * This value is commonly used in YCSB benchmarks. + */ + private static final double ZIPFIAN_SKEW = 0.99; + + /** Base random seed for reproducibility. */ + private static final long BASE_SEED = 42L; + + /** + * Warmup seed (distinct from thread seeds to avoid access pattern overlap). + * Using 999999 keeps it far away from thread seeds but below the thread spacing prime (1000003). + */ + private static final long WARMUP_SEED = BASE_SEED + 999999L; + + /** + * Number of partitions: 16 is representative of small-to-medium tables. + * (Small tables have 10-100 partitions, medium 100-1000, large 1000+) + */ + private static final int PARTITION_COUNT = 16; + + /** + * Warmup multiplier: Access 110% of capacity to ensure cache fills completely + * and initial replacements begin before measurement starts. + */ + private static final double WARMUP_MULTIPLIER = 1.1; + + /** + * Minimum working set size as fraction of region capacity. + * Just a sanity check - even LOW pressure is 120%, so anything below 10% is a setup failure. + */ + private static final double MIN_WORKING_SET_RATIO = 0.1; + + /** Page replacement policy to test. */ + @Param({"CLOCK", "SEGMENTED_LRU", "RANDOM_LRU"}) + public ReplacementMode replacementModeParam; + + /** + * Cache pressure level: working set size as multiple of capacity. + * Higher pressure = more cache misses and replacements = better policy differentiation. + */ + @Param({"LOW", "MEDIUM", "HIGH"}) + public CachePressure cachePressure; + + /** + * Pre-allocated page IDs for the working set. + * Shared across all threads (populated in setup, read-only during benchmark). + */ + private long[] pageIds; + + /** Computed region capacity in pages. */ + private long regionCapacityPages; + + /** Working set size. */ + private int workingSetSize; + + /** + * Metrics snapshot before iteration (captured once per iteration at benchmark level). + * Volatile because it's written in setupIteration() and read in tearDownIteration(). + */ + private volatile MetricsSnapshot beforeMetrics; + + /** + * Volatile accumulator to prevent warmup reads from being optimized away. + * This field is write-only - never read after warmupCache() completes. + * Its purpose is to force the JIT compiler to keep all warmup read operations. + */ + @SuppressWarnings("unused") + private volatile long warmupAccumulator; + + /** + * Cache pressure levels determining working set size relative to capacity. + */ + public enum CachePressure { + /** Low pressure: 1.2× capacity. Minimal replacement activity. */ + LOW(1.2), Review Comment: The idea is to make different pressure to get different behaviour. These numbers provide different amount of pressure - small, medium and large. Basically I selected low and large, where large is big enough for the purpose of the benchmark but small enough to don't make the dataset excessively big. The small one is just as low as we can get to 1 (with 1 we would have no replacement activity at all), and everything else is somewhere in between. If you have a proposal for other numbers, you are welcome. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
