[ 
https://issues.apache.org/jira/browse/AVRO-2269?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16689326#comment-16689326
 ] 

ASF GitHub Bot commented on AVRO-2269:
--------------------------------------

rstata closed pull request #384: AVRO-2269 A few changes to lower the runtime 
variance of Perf.java tests
URL: https://github.com/apache/avro/pull/384
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java 
b/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java
index 860669468..9df56bb84 100644
--- a/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java
+++ b/lang/java/ipc/src/test/java/org/apache/avro/io/Perf.java
@@ -141,13 +141,14 @@ void add(List<TestDescriptor> typeList) {
   private static final int BYTES_PS_FIELD = 2;
   private static final int ENTRIES_PS_FIELD = 3;
   private static final int BYTES_PC_FIELD = 4;
-  private static final int MAX_FIELD = 4;
+  private static final int MIN_TIME_FIELD = 5;
+  private static final int MAX_FIELD_TAG = 5;
 
   private static void usage() {
     StringBuilder usage = new StringBuilder("Usage: Perf [-o <file>] [-c 
<spec>] { -nowrite | -noread | ");
     StringBuilder details = new StringBuilder();
     details.append(" -o file   (send output to a file)\n");
-    details.append(" -c [n][t][e][b][c] (format as no-header CSV; include 
Name, Time, Entries/sec, Bytes/sec, and/or bytes/Cycle; no spec=all fields)\n");
+    details.append(" -c [n][t][e][b][c][m] (format as no-header CSV; include 
Name, Time, Entries/sec, Bytes/sec, bytes/Cycle, and/or min time/op; no 
spec=all fields)\n");
     details.append(" -nowrite   (do not execute write tests)\n");
     details.append(" -noread   (do not execute write tests)\n");
     for (Map.Entry<String, List<TestDescriptor>> entry : BATCHES.entrySet()) {
@@ -179,6 +180,7 @@ public static void main(String[] args) throws Exception {
     String outputfilename = null;
     PrintStream out = System.out;
     boolean[] csvFormat = null;
+    String csvFormatString = null;
 
     for (int i = 0; i < args.length; i++) {
       String a = args[i];
@@ -200,17 +202,20 @@ public static void main(String[] args) throws Exception {
         continue;
       }
       if ("-c".equals(a)) {
-        if (i == args.length-1 || args[i+1].startsWith("-"))
-          csvFormat = new boolean[] { true, true, true, true, true };
-        else {
-          csvFormat = new boolean[5];
-          for (char c : args[++i].toCharArray())
+        if (i == args.length-1 || args[i+1].startsWith("-")) {
+          csvFormatString = "ntebcm"; // For diagnostics
+          csvFormat = new boolean[] { true, true, true, true, true, true };
+        } else {
+          csvFormatString = args[++i];
+          csvFormat = new boolean[MAX_FIELD_TAG+1];
+          for (char c : csvFormatString.toCharArray())
             switch (c) {
             case 'n': csvFormat[NAME_FIELD] = true; break;
             case 't': csvFormat[TIME_FIELD] = true; break;
             case 'e': csvFormat[BYTES_PS_FIELD] = true; break;
             case 'b': csvFormat[ENTRIES_PS_FIELD] = true; break;
             case 'c': csvFormat[BYTES_PC_FIELD] = true; break;
+            case 'm': csvFormat[MIN_TIME_FIELD] = true; break;
             default:
               usage();
               System.exit(1);
@@ -237,9 +242,12 @@ public static void main(String[] args) throws Exception {
       }
     }
     System.out.println("Executing tests: \n" + tests +  "\n readTests:" +
-        readTests + "\n writeTests:" + writeTests + "\n cycles=" + CYCLES);
+        readTests + "\n writeTests:" + writeTests + "\n cycles=" + CYCLES +
+        "\n count=" + (COUNT / 1000) + "K");
     if (out != System.out) System.out.println(" Writing to: " + 
outputfilename);
-    if (csvFormat != null) System.out.println(" in CSV format.");
+    if (csvFormat != null) System.out.println(" CSV format: " + 
csvFormatString);
+
+    TestResult tr = new TestResult();
 
     for (int k = 0; k < tests.size(); k++) {
       Test t = tests.get(k);
@@ -275,28 +283,41 @@ public static void main(String[] args) throws Exception {
           t.writeTest();
         }
       }
-      t.reset();
+
       // test
-      long s = 0;
       System.gc();
-      t.init();
       if (t.isReadTest() && readTests) {
+        tr.reset();
         for (int i = 0; i < t.cycles; i++) {
-          s += t.readTest();
+          tr.update(t.readTest());
         }
-        printResult(out, csvFormat, s, t, t.name + "Read");
+        printResult(out, csvFormat, tr, t, t.name + "Read");
       }
-      s = 0;
       if (t.isWriteTest() && writeTests) {
+        tr.reset();
         for (int i = 0; i < t.cycles; i++) {
-          s += t.writeTest();
+          tr.update(t.writeTest());
         }
-        printResult(out, csvFormat, s, t, t.name + "Write");
+        printResult(out, csvFormat, tr, t, t.name + "Write");
       }
       t.reset();
     }
   }
 
+  private static class TestResult {
+    public long totalTime;
+    public long minTime;
+    public void reset() {
+      totalTime = 0L;
+      minTime = Long.MAX_VALUE;
+    }
+    public long update(long t) {
+      totalTime += t;
+      minTime = Math.min(t, minTime);
+      return t;
+    }
+  }
+
   private static final void printHeader() {
     String header = String.format(
         "%60s     time    M entries/sec   M bytes/sec  bytes/cycle",
@@ -305,23 +326,25 @@ private static final void printHeader() {
   }
 
   private static final void printResult(PrintStream o, boolean[] csv,
-                                        long s, Test t, String name)
+                                        TestResult tr, Test t, String name)
   {
-    s /= 1000;
+    long s = tr.totalTime / 1000;
     double entries = (t.cycles * (double) t.count);
     double bytes = t.cycles * (double) t.encodedSize;
     StringBuilder result = new StringBuilder();
     if (csv != null) {
       boolean commaneeded = false;
-      for (int i = 0; i <= MAX_FIELD; i++) {
+      for (int i = 0; i <= MAX_FIELD_TAG; i++) {
+        if (! csv[i]) continue;
         if (commaneeded) result.append(",");
         else commaneeded = true;
         switch (i) {
         case NAME_FIELD: result.append(name); break;
         case TIME_FIELD: result.append(String.format("%d", (s/1000))); break;
         case BYTES_PS_FIELD: result.append(String.format("%.3f", (entries / 
s))); break;
-        case ENTRIES_PS_FIELD: result.append(String.format(".3%f", (bytes / 
s))); break;
+        case ENTRIES_PS_FIELD: result.append(String.format("%.3f", (bytes / 
s))); break;
         case BYTES_PC_FIELD: result.append(String.format("%d", 
t.encodedSize)); break;
+        case MIN_TIME_FIELD: result.append(String.format("%d", tr.minTime)); 
break;
         }
       }
     } else {
@@ -400,16 +423,16 @@ public String toString() {
 
     @Override
     public final long readTest() throws IOException {
-      long t = System.nanoTime();
       Decoder d = getDecoder();
+      long t = System.nanoTime();
       readInternal(d);
       return (System.nanoTime() - t);
     }
 
     @Override
     public final long writeTest() throws IOException {
-      long t = System.nanoTime();
       Encoder e = getEncoder();
+      long t = System.nanoTime();
       writeInternal(e);
       e.flush();
       return (System.nanoTime() - t);
@@ -428,8 +451,8 @@ protected Decoder newDecoder() {
     }
 
     protected Encoder newEncoder(ByteArrayOutputStream out) throws IOException 
{
-      Encoder e = encoder_factory.binaryEncoder(out, null);
-//    Encoder e = encoder_factory.directBinaryEncoder(out, null);
+//    Encoder e = encoder_factory.binaryEncoder(out, null);
+      Encoder e = encoder_factory.directBinaryEncoder(out, null);
 //    Encoder e = encoder_factory.blockingBinaryEncoder(out, null);
 //    Encoder e = new LegacyBinaryEncoder(out);
       return e;
@@ -1419,18 +1442,13 @@ protected Decoder getDecoder() {
     protected final SpecificDatumReader<T> reader;
     protected final SpecificDatumWriter<T> writer;
     private Object[] sourceData;
+    private T reuse;
 
     protected SpecificTest(String name, String writerSchema) throws 
IOException {
       super(name, writerSchema, 48);
       reader = newReader();
       writer = newWriter();
     }
-    protected SpecificDatumReader<T> getReader() {
-      return reader;
-    }
-    protected SpecificDatumWriter<T> getWriter() {
-      return writer;
-    }
     protected SpecificDatumReader<T> newReader() {
       return new SpecificDatumReader<>(schema);
     }
@@ -1444,6 +1462,7 @@ void genSourceData() {
       for (int i = 0; i < sourceData.length; i++) {
         sourceData[i] = genSingleRecord(r);
       }
+      reuse = genSingleRecord(r);
     }
 
     protected abstract T genSingleRecord(Random r);
@@ -1451,7 +1470,7 @@ void genSourceData() {
     @Override
     void readInternal(Decoder d) throws IOException {
       for (int i = 0; i < count; i++) {
-        getReader().read(null, d);
+        reader.read(reuse, d);
       }
     }
     @Override
@@ -1459,7 +1478,7 @@ void writeInternal(Encoder e) throws IOException {
       for (int i = 0; i < sourceData.length; i++) {
         @SuppressWarnings("unchecked")
         T rec = (T) sourceData[i];
-        getWriter().write(rec, e);
+        writer.write(rec, e);
       }
     }
     @Override


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Improve variances seen across Perf.java runs
> --------------------------------------------
>
>                 Key: AVRO-2269
>                 URL: https://issues.apache.org/jira/browse/AVRO-2269
>             Project: Apache Avro
>          Issue Type: Test
>          Components: java
>            Reporter: Raymie Stata
>            Assignee: Raymie Stata
>            Priority: Major
>
> In attempting to use Perf.java to show that proposed performance changes 
> actually improved performance, different runs of Perf.java using the exact 
> same code base resulted variances of 5% or greater – and often 10% or greater 
> – for about half the test cases. With variance this high within a code base, 
> it's impossible to tell if a proposed "improved" code base indeed improves 
> performance. I will post to the wiki and elsewhere some documents and scripts 
> I developed to reduce this variance. This JIRA is for changes to Perf.java 
> that reduce the variance. Specifically:
>  * Access the {{reader}} and {{writer}} instance variables directly in the 
> inner-loop for {{SpecificTest}}, as well as switched to a "reuse" object for 
> reading records, rather than constructing fresh objects for each read. Both 
> helped to significantly reduce variance for 
> {{FooBarSpecificRecordTestWrite}}, a major target of recent 
> performance-improvement efforts.
>  * Switched to {{DirectBinaryEncoder}} instead of {{BufferedBinaryEncoder}} 
> for write tests. Although this slowed writer-tests a bit, it reduced variance 
> a lot, especially for performance tests of primitives like booleans, making 
> it a better choice for measuring the performance-impact of code changes.
>  * Started the timer of a test after the encoder/decoder for the test is 
> constructed, rather than before. Helps a little.
>  * Added the ability to output the _minimum_ runtime of a test case across 
> multiple cycles (vs the total runtime across all cycles). This was inspired 
> by JVMSpec, which used to use a minimum.  I was able to reduce the variance 
> of total runtime enough to obviate the need for this metric, but since it's 
> helpful diagnostically, I left it in.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to