On Fri, 4 Aug 2023 13:04:28 GMT, Yi Yang <[email protected]> wrote:
>> ### Motivation and proposal
>> Hi, heap dump brings about pauses for application's execution(STW), this is
>> a well-known pain. JDK-8252842 have added parallel support to heapdump in an
>> attempt to alleviate this issue. However, all concurrent threads
>> competitively write heap data to the same file, and more memory is required
>> to maintain the concurrent buffer queue. In experiments, we did not feel a
>> significant performance improvement from that.
>>
>> The minor-pause solution, which is presented in this PR, is a two-phase
>> segmented heap dump:
>>
>> - Phase 1(STW): Concurrent threads directly write data to multiple heap
>> files.
>> - Phase 2(Non-STW): Merge multiple heap files into one complete heap dump
>> file. This process can happen outside safepoint.
>>
>> Now concurrent worker threads are not required to maintain a buffer queue,
>> which would result in more memory overhead, nor do they need to compete for
>> locks. The changes in the overall design are as follows:
>>
>> 
>> <p align="center">Fig1. Before</p>
>>
>> 
>> <p align="center">Fig2. After this patch</p>
>>
>> ### Performance evaluation
>> | memory | numOfThread | CompressionMode | STW | Total |
>> | -------| ----------- | --------------- | --- | ---- |
>> | 8g | 1 T | N | 15.612 | 15.612 |
>> | 8g | 32 T | N | 2.561725 | 14.498 |
>> | 8g | 32 T | C1 | 2.3084878 | 14.198 |
>> | 8g | 32 T | C2 | 10.9355128 | 21.882 |
>> | 8g | 96 T | N | 2.6790452 | 14.012 |
>> | 8g | 96 T | C1 | 2.3044796 | 3.589 |
>> | 8g | 96 T | C2 | 9.7585151 | 20.219 |
>> | 16g | 1 T | N | 26.278 | 26.278 |
>> | 16g | 32 T | N | 5.231374 | 26.417 |
>> | 16g | 32 T | C1 | 5.6946983 | 6.538 |
>> | 16g | 32 T | C2 | 21.8211105 | 41.133 |
>> | 16g | 96 T | N | 6.2445556 | 27.141 |
>> | 16g | 96 T | C1 | 4.6007096 | 6.259 |
>> | 16g | 96 T | C2 | 19.2965783 | 39.007 |
>> | 32g | 1 T | N | 48.149 | 48.149 |
>> | 32g | 32 T | N | 10.7734677 | 61.643 |
>> | 32g | 32 T | C1 | 10.1642097 | 10.903 |
>> | 32g | 32 T | C2 | 43.8407607 | 88.152 |
>> | 32g | 96 T | N | 13.1522042 | 61.432 |
>> | 32g | 96 T | C1 | 9.0954641 | 9.885 |
>> | 32g | 96 T | C2 | 38.9900931 | 80.574 |
>> | 64g | 1 T | N | 100.583 | 100.583 |
>> | 64g | 32 T | N | 20.9233744 | 134.701 |
>> | 64g | 32 T | C1 | 18.5023784 | 19.358 |
>> | 64g | 32 T | C2 | 86.4748377 | 172.707 |
>> | 64g | 96 T | N | 26.7374116 | 126.08 |
>> | 64g | ...
>
> Yi Yang has updated the pull request incrementally with one additional commit
> since the last revision:
>
> new can_parallel_dump
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;
import jdk.test.lib.Asserts;
import jdk.test.lib.JDKToolLauncher;
import jdk.test.lib.apps.LingeredApp;
import jdk.test.lib.dcmd.PidJcmdExecutor;
import jdk.test.lib.process.OutputAnalyzer;
import jdk.test.lib.process.ProcessTools;
import jdk.test.lib.hprof.HprofParser;
/**
* @test
* @bug 8306441
* @summary Verify the integrity of generated heap dump and capability of
parallel dump
* @library /test/lib
* @run driver HeapDumpParallelTest
*/
public class HeapDumpParallelTest {
private static void checkAndVerify(OutputAnalyzer dcmdOut, LingeredApp app,
File heapDumpFile, boolean expectSerial) throws IOException {
dcmdOut.shouldHaveExitValue(0);
dcmdOut.shouldContain("Heap dump file created");
OutputAnalyzer appOut = new OutputAnalyzer(app.getProcessStdout());
appOut.shouldContain("[heapdump]");
if (!expectSerial && Runtime.getRuntime().availableProcessors() > 1) {
appOut.shouldContain("Dump heap objects in parallel");
appOut.shouldContain("Merge heap files complete");
} else {
appOut.shouldNotContain("Dump heap objects in parallel");
appOut.shouldNotContain("Merge heap files complete");
}
verifyHeapDump(heapDumpFile);
if (heapDumpFile.exists()) {
heapDumpFile.delete();
}
}
private static LingeredApp launchApp() throws IOException {
LingeredApp theApp = new LingeredApp();
LingeredApp.startApp(theApp, "-Xlog:heapdump", "-Xmx512m",
"-XX:-UseDynamicNumberOfGCThreads",
"-XX:ParallelGCThreads=2");
return theApp;
}
public static void main(String[] args) throws Exception {
String heapDumpFileName = "parallelHeapDump.bin";
File heapDumpFile = new File(heapDumpFileName);
if (heapDumpFile.exists()) {
heapDumpFile.delete();
}
LingeredApp theApp = launchApp();
try {
// Expect error message
OutputAnalyzer out = attachJcmdHeapDump(heapDumpFile,
theApp.getPid(), "-parallel=" + -1);
out.shouldContain("Invalid number of parallel dump threads.");
// Expect serial dump because 0 implies to disable parallel dump
test(heapDumpFile, "-parallel=" + 0, true);
// Expect serial dump
test(heapDumpFile, "-parallel=" + 1, true);
// Expect parallel dump
test(heapDumpFile, "-parallel=" + Integer.MAX_VALUE, false);
// Expect parallel dump
test(heapDumpFile, "-gz=9 -overwrite -parallel=" +
Runtime.getRuntime().availableProcessors(), false);
} finally {
theApp.stopApp();
}
}
private static void test(File heapDumpFile, String arg, boolean
expectSerial) throws Exception {
LingeredApp theApp = launchApp();
try {
OutputAnalyzer dcmdOut = attachJcmdHeapDump(heapDumpFile,
theApp.getPid(), arg);
theApp.stopApp();
checkAndVerify(dcmdOut, theApp, heapDumpFile, expectSerial);
} finally {
theApp.stopApp();
}
}
private static OutputAnalyzer attachJcmdHeapDump(File heapDumpFile, long
lingeredAppPid, String arg) throws Exception {
// e.g. jcmd <pid> GC.heap_dump -parallel=cpucount <file_path>
System.out.println("Testing pid " + lingeredAppPid);
PidJcmdExecutor executor = new PidJcmdExecutor("" + lingeredAppPid);
return executor.execute("GC.heap_dump " + arg + " " +
heapDumpFile.getAbsolutePath());
}
private static void verifyHeapDump(File dump) {
...as before...
-------------
PR Comment: https://git.openjdk.org/jdk/pull/13667#issuecomment-1669094080