[jira] [Commented] (AVRO-2269) Improve variances seen across Perf.java runs
[ https://issues.apache.org/jira/browse/AVRO-2269?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16689326#comment-16689326 ] ASF GitHub Bot commented on AVRO-2269: -- rstata closed pull request #384: AVRO-2269 A few changes to lower the runtime variance of Perf.java tests URL: https://github.com/apache/avro/pull/384 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java b/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java index 860669468..9df56bb84 100644 --- a/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java +++ b/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java @@ -141,13 +141,14 @@ void add(List typeList) { private static final int BYTES_PS_FIELD = 2; private static final int ENTRIES_PS_FIELD = 3; private static final int BYTES_PC_FIELD = 4; - private static final int MAX_FIELD = 4; + private static final int MIN_TIME_FIELD = 5; + private static final int MAX_FIELD_TAG = 5; private static void usage() { StringBuilder usage = new StringBuilder("Usage: Perf [-o ] [-c ] { -nowrite | -noread | "); StringBuilder details = new StringBuilder(); details.append(" -o file (send output to a file)\n"); -details.append(" -c [n][t][e][b][c] (format as no-header CSV; include Name, Time, Entries/sec, Bytes/sec, and/or bytes/Cycle; no spec=all fields)\n"); +details.append(" -c [n][t][e][b][c][m] (format as no-header CSV; include Name, Time, Entries/sec, Bytes/sec, bytes/Cycle, and/or min time/op; no spec=all fields)\n"); details.append(" -nowrite (do not execute write tests)\n"); details.append(" -noread (do not execute write tests)\n"); for (Map.Entry> entry : BATCHES.entrySet()) { @@ -179,6 +180,7 @@ public static void main(String[] args) throws Exception { String outputfilename = null; PrintStream out = System.out; boolean[] csvFormat = null; +String csvFormatString = null; for (int i = 0; i < args.length; i++) { String a = args[i]; @@ -200,17 +202,20 @@ public static void main(String[] args) throws Exception { continue; } if ("-c".equals(a)) { -if (i == args.length-1 || args[i+1].startsWith("-")) - csvFormat = new boolean[] { true, true, true, true, true }; -else { - csvFormat = new boolean[5]; - for (char c : args[++i].toCharArray()) +if (i == args.length-1 || args[i+1].startsWith("-")) { + csvFormatString = "ntebcm"; // For diagnostics + csvFormat = new boolean[] { true, true, true, true, true, true }; +} else { + csvFormatString = args[++i]; + csvFormat = new boolean[MAX_FIELD_TAG+1]; + for (char c : csvFormatString.toCharArray()) switch (c) { case 'n': csvFormat[NAME_FIELD] = true; break; case 't': csvFormat[TIME_FIELD] = true; break; case 'e': csvFormat[BYTES_PS_FIELD] = true; break; case 'b': csvFormat[ENTRIES_PS_FIELD] = true; break; case 'c': csvFormat[BYTES_PC_FIELD] = true; break; +case 'm': csvFormat[MIN_TIME_FIELD] = true; break; default: usage(); System.exit(1); @@ -237,9 +242,12 @@ public static void main(String[] args) throws Exception { } } System.out.println("Executing tests: \n" + tests + "\n readTests:" + -readTests + "\n writeTests:" + writeTests + "\n cycles=" + CYCLES); +readTests + "\n writeTests:" + writeTests + "\n cycles=" + CYCLES + +"\n count=" + (COUNT / 1000) + "K"); if (out != System.out) System.out.println(" Writing to: " + outputfilename); -if (csvFormat != null) System.out.println(" in CSV format."); +if (csvFormat != null) System.out.println(" CSV format: " + csvFormatString); + +TestResult tr = new TestResult(); for (int k = 0; k < tests.size(); k++) { Test t = tests.get(k); @@ -275,28 +283,41 @@ public static void main(String[] args) throws Exception { t.writeTest(); } } - t.reset(); + // test - long s = 0; System.gc(); - t.init(); if (t.isReadTest() && readTests) { +tr.reset(); for (int i = 0; i < t.cycles; i++) { - s += t.readTest(); + tr.update(t.readTest()); } -printResult(out, csvFormat, s, t, t.name + "Read"); +printResult(out, csvFormat, tr, t, t.name + "Read"); } - s = 0; if (t.isWriteTest() && writeTests) { +tr.reset(); for (int i = 0; i < t.cycles; i++) { - s += t.writeTest(); +
[jira] [Commented] (AVRO-2269) Improve variances seen across Perf.java runs
[ https://issues.apache.org/jira/browse/AVRO-2269?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16689325#comment-16689325 ] ASF GitHub Bot commented on AVRO-2269: -- rstata commented on issue #384: AVRO-2269 A few changes to lower the runtime variance of Perf.java tests URL: https://github.com/apache/avro/pull/384#issuecomment-439372811 I'm closing this pull request for now while I look at some performance results more carefully. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Improve variances seen across Perf.java runs > > > Key: AVRO-2269 > URL: https://issues.apache.org/jira/browse/AVRO-2269 > Project: Apache Avro > Issue Type: Test > Components: java >Reporter: Raymie Stata >Assignee: Raymie Stata >Priority: Major > > In attempting to use Perf.java to show that proposed performance changes > actually improved performance, different runs of Perf.java using the exact > same code base resulted variances of 5% or greater – and often 10% or greater > – for about half the test cases. With variance this high within a code base, > it's impossible to tell if a proposed "improved" code base indeed improves > performance. I will post to the wiki and elsewhere some documents and scripts > I developed to reduce this variance. This JIRA is for changes to Perf.java > that reduce the variance. Specifically: > * Access the {{reader}} and {{writer}} instance variables directly in the > inner-loop for {{SpecificTest}}, as well as switched to a "reuse" object for > reading records, rather than constructing fresh objects for each read. Both > helped to significantly reduce variance for > {{FooBarSpecificRecordTestWrite}}, a major target of recent > performance-improvement efforts. > * Switched to {{DirectBinaryEncoder}} instead of {{BufferedBinaryEncoder}} > for write tests. Although this slowed writer-tests a bit, it reduced variance > a lot, especially for performance tests of primitives like booleans, making > it a better choice for measuring the performance-impact of code changes. > * Started the timer of a test after the encoder/decoder for the test is > constructed, rather than before. Helps a little. > * Added the ability to output the _minimum_ runtime of a test case across > multiple cycles (vs the total runtime across all cycles). This was inspired > by JVMSpec, which used to use a minimum. I was able to reduce the variance > of total runtime enough to obviate the need for this metric, but since it's > helpful diagnostically, I left it in. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (AVRO-2269) Improve variances seen across Perf.java runs
[ https://issues.apache.org/jira/browse/AVRO-2269?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16689215#comment-16689215 ] ASF GitHub Bot commented on AVRO-2269: -- rstata opened a new pull request #384: AVRO-2269 A few changes to lower the runtime variance of Perf.java tests URL: https://github.com/apache/avro/pull/384 The changes herein help reduce the number of "high-variance" tests (ie, test-cases that vary more than 5% from run to run) from almost 50% of the test cases to around 10%. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Improve variances seen across Perf.java runs > > > Key: AVRO-2269 > URL: https://issues.apache.org/jira/browse/AVRO-2269 > Project: Apache Avro > Issue Type: Test > Components: java >Reporter: Raymie Stata >Assignee: Raymie Stata >Priority: Major > > In attempting to use Perf.java to show that proposed performance changes > actually improved performance, different runs of Perf.java using the exact > same code base resulted variances of 5% or greater – and often 10% or greater > – for about half the test cases. With variance this high within a code base, > it's impossible to tell if a proposed "improved" code base indeed improves > performance. I will post to the wiki and elsewhere some documents and scripts > I developed to reduce this variance. This JIRA is for changes to Perf.java > that reduce the variance. Specifically: > * Access the {{reader}} and {{writer}} instance variables directly in the > inner-loop for {{SpecificTest}}, as well as switched to a "reuse" object for > reading records, rather than constructing fresh objects for each read. Both > helped to significantly reduce variance for > {{FooBarSpecificRecordTestWrite}}, a major target of recent > performance-improvement efforts. > * Switched to {{DirectBinaryEncoder}} instead of {{BufferedBinaryEncoder}} > for write tests. Although this slowed writer-tests a bit, it reduced variance > a lot, especially for performance tests of primitives like booleans, making > it a better choice for measuring the performance-impact of code changes. > * Started the timer of a test after the encoder/decoder for the test is > constructed, rather than before. Helps a little. > * Added the ability to output the _minimum_ runtime of a test case across > multiple cycles (vs the total runtime across all cycles). This was inspired > by JVMSpec, which used to use a minimum. I was able to reduce the variance > of total runtime enough to obviate the need for this metric, but since it's > helpful diagnostically, I left it in. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (AVRO-2269) Improve variances seen across Perf.java runs
[ https://issues.apache.org/jira/browse/AVRO-2269?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16689164#comment-16689164 ] Raymie Stata commented on AVRO-2269: I took a look at JMH. I think it'd be great to convert `Perf.java` over to JMH. I didn't pursue it because I couldn't find good enough doc's on JMH to feel comfortable using it myself. The forthcoming patch I have for AVRO-2269 make changes that are orthogonal to what JMH does. JMH does things like warm up the JIT and various caches, and so forth, and it runs tests a dynamic number of times in order to "seek" stable statistics on performance metrics. The current `Perf.main` does some of this already – I didn't touch any of that code – but JMH seems to do a much more professional job of it. Thus, again, it'd be great to convert `Perf.java` to JMH. That said, while JMH might do a pretty good job of finding the "true" running time of a highly-variance piece of code, it doesn't turn a high-variance piece of code into a low-variance one. The forthcoming patch for AVRO-2269 do the latter – try to reduce the inherent variance of the tests (for example, by reducing the allocations done for `FooBarSpecificRecord` tests). JMH together with this forthcoming patch would be a great combination. A just submitted a pull request for AVRO-2268 containing a little bug fix that I want to depend upon, but which is pretty independent of the changes I have for AVRO-2269. If someone could pull AVRO-2268, I'd like to rebase onto that change before submitting the AVRO-2269 patch. > Improve variances seen across Perf.java runs > > > Key: AVRO-2269 > URL: https://issues.apache.org/jira/browse/AVRO-2269 > Project: Apache Avro > Issue Type: Test > Components: java >Reporter: Raymie Stata >Assignee: Raymie Stata >Priority: Major > > In attempting to use Perf.java to show that proposed performance changes > actually improved performance, different runs of Perf.java using the exact > same code base resulted variances of 5% or greater – and often 10% or greater > – for about half the test cases. With variance this high within a code base, > it's impossible to tell if a proposed "improved" code base indeed improves > performance. I will post to the wiki and elsewhere some documents and scripts > I developed to reduce this variance. This JIRA is for changes to Perf.java > that reduce the variance. Specifically: > * Access the {{reader}} and {{writer}} instance variables directly in the > inner-loop for {{SpecificTest}}, as well as switched to a "reuse" object for > reading records, rather than constructing fresh objects for each read. Both > helped to significantly reduce variance for > {{FooBarSpecificRecordTestWrite}}, a major target of recent > performance-improvement efforts. > * Switched to {{DirectBinaryEncoder}} instead of {{BufferedBinaryEncoder}} > for write tests. Although this slowed writer-tests a bit, it reduced variance > a lot, especially for performance tests of primitives like booleans, making > it a better choice for measuring the performance-impact of code changes. > * Started the timer of a test after the encoder/decoder for the test is > constructed, rather than before. Helps a little. > * Added the ability to output the _minimum_ runtime of a test case across > multiple cycles (vs the total runtime across all cycles). This was inspired > by JVMSpec, which used to use a minimum. I was able to reduce the variance > of total runtime enough to obviate the need for this metric, but since it's > helpful diagnostically, I left it in. -- This message was sent by Atlassian JIRA (v7.6.3#76005)
[jira] [Commented] (AVRO-2269) Improve variances seen across Perf.java runs
[ https://issues.apache.org/jira/browse/AVRO-2269?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16688925#comment-16688925 ] Zoltan Farkas commented on AVRO-2269: - instead or re-inventing the wheel...Perf.java should be rewritten to use JMH (https://java-performance.info/jmh/) .. see https://github.com/zolyfarkas/benchmarks for an example example on how to use JMH (there is also a benchmark for one of my experiments: https://github.com/zolyfarkas/benchmarks/blob/master/src/test/java/org/spf4j/avro/GenericRecordBenchmark.java) the example shows how you can run them with a profiler Java flight recorder, stack sampler, so that you can even have some data to look at and optimize... > Improve variances seen across Perf.java runs > > > Key: AVRO-2269 > URL: https://issues.apache.org/jira/browse/AVRO-2269 > Project: Apache Avro > Issue Type: Test > Components: java >Reporter: Raymie Stata >Assignee: Raymie Stata >Priority: Major > > In attempting to use Perf.java to show that proposed performance changes > actually improved performance, different runs of Perf.java using the exact > same code base resulted variances of 5% or greater – and often 10% or greater > – for about half the test cases. With variance this high within a code base, > it's impossible to tell if a proposed "improved" code base indeed improves > performance. I will post to the wiki and elsewhere some documents and scripts > I developed to reduce this variance. This JIRA is for changes to Perf.java > that reduce the variance. Specifically: > * Access the {{reader}} and {{writer}} instance variables directly in the > inner-loop for {{SpecificTest}}, as well as switched to a "reuse" object for > reading records, rather than constructing fresh objects for each read. Both > helped to significantly reduce variance for > {{FooBarSpecificRecordTestWrite}}, a major target of recent > performance-improvement efforts. > * Switched to {{DirectBinaryEncoder}} instead of {{BufferedBinaryEncoder}} > for write tests. Although this slowed writer-tests a bit, it reduced variance > a lot, especially for performance tests of primitives like booleans, making > it a better choice for measuring the performance-impact of code changes. > * Started the timer of a test after the encoder/decoder for the test is > constructed, rather than before. Helps a little. > * Added the ability to output the _minimum_ runtime of a test case across > multiple cycles (vs the total runtime across all cycles). This was inspired > by JVMSpec, which used to use a minimum. I was able to reduce the variance > of total runtime enough to obviate the need for this metric, but since it's > helpful diagnostically, I left it in. -- This message was sent by Atlassian JIRA (v7.6.3#76005)