Repository: incubator-impala
Updated Branches:
  refs/heads/master ed5ec6772 -> 476f687b4


IMPALA-3727: Change microbenchmarks to use percentile-based reporting

This doesn't make each run more robust, but by running the benchmark
60 times and reporting the 10th, 50th, and 90th percentile, it shows
which changes to benchmark results are part of the variance and which
are actual improvements.

Change-Id: I8aa35789922e5019fbb77b0737c9ea19df038e3b
Reviewed-on: http://gerrit.cloudera.org:8080/3579
Reviewed-by: Jim Apple <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/476f687b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/476f687b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/476f687b

Branch: refs/heads/master
Commit: 476f687b4f29dee4da5e5e10251a546b8db9194f
Parents: 3810b7c
Author: Jim Apple <[email protected]>
Authored: Mon Jun 20 14:46:16 2016 -0700
Committer: Tim Armstrong <[email protected]>
Committed: Fri Jul 8 19:42:18 2016 -0700

----------------------------------------------------------------------
 be/src/benchmarks/bloom-filter-benchmark.cc   | 217 ++++++++++-----------
 be/src/benchmarks/string-compare-benchmark.cc |  30 ++-
 be/src/util/benchmark.cc                      |  67 +++++--
 be/src/util/benchmark.h                       |   4 +-
 4 files changed, 189 insertions(+), 129 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/476f687b/be/src/benchmarks/bloom-filter-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/bloom-filter-benchmark.cc 
b/be/src/benchmarks/bloom-filter-benchmark.cc
index 35d48fc..f38735e 100644
--- a/be/src/benchmarks/bloom-filter-benchmark.cc
+++ b/be/src/benchmarks/bloom-filter-benchmark.cc
@@ -38,103 +38,94 @@ using namespace impala;
 // As in bloom-filter.h, ndv refers to the number of unique items inserted 
into a filter
 // and fpp is the probability of false positives.
 //
-//
 // Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
 //
-// With AVX2:
-//
-// initialize:           Function     Rate (iters/ms)          Comparison
-// ----------------------------------------------------------------------
-//       ndv      10k fpp   10.0%                6607                  1X
-//       ndv      10k fpp    1.0%                3427             0.5187X
-//       ndv      10k fpp    0.1%                1203              0.182X
-//       ndv    1000k fpp   10.0%               5.273          0.0007982X
-//       ndv    1000k fpp    1.0%               3.297           0.000499X
-//       ndv    1000k fpp    0.1%                3.31           0.000501X
-//       ndv  100000k fpp   10.0%             0.08597          1.301e-05X
-//       ndv  100000k fpp    1.0%              0.0846           1.28e-05X
-//       ndv  100000k fpp    0.1%             0.04349          6.582e-06X
+// initialize:                Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//            ndv      10k fpp   10.0%           7.05e+03 7.27e+03 7.34e+03    
     1X         1X         1X
+//            ndv      10k fpp    1.0%           3.79e+03 3.93e+03 3.96e+03    
 0.538X     0.541X      0.54X
+//            ndv      10k fpp    0.1%           1.39e+03 1.42e+03 1.44e+03    
 0.198X     0.196X     0.196X
+//            ndv    1000k fpp   10.0%               4.62     4.78     4.81  
0.000655X  0.000658X  0.000655X
+//            ndv    1000k fpp    1.0%               2.49     2.55      2.6  
0.000354X  0.000351X  0.000354X
+//            ndv    1000k fpp    0.1%               2.45     2.55      2.6  
0.000347X  0.000351X  0.000354X
+//            ndv  100000k fpp   10.0%              0.035   0.0358    0.037  
4.96e-06X  4.93e-06X  5.04e-06X
+//            ndv  100000k fpp    1.0%             0.0347   0.0361   0.0372  
4.93e-06X  4.96e-06X  5.06e-06X
+//            ndv  100000k fpp    0.1%             0.0176   0.0181   0.0186   
2.5e-06X  2.49e-06X  2.53e-06X
 //
-// insert:               Function     Rate (iters/ms)          Comparison
-// ----------------------------------------------------------------------
-//       ndv      10k fpp   10.0%           1.858e+05                  1X
-//       ndv      10k fpp    1.0%           1.801e+05             0.9693X
-//       ndv      10k fpp    0.1%           1.869e+05              1.006X
-//       ndv    1000k fpp   10.0%           1.686e+05             0.9076X
-//       ndv    1000k fpp    1.0%           1.627e+05             0.8756X
-//       ndv    1000k fpp    0.1%            1.53e+05             0.8234X
-//       ndv  100000k fpp   10.0%           4.262e+04             0.2294X
-//       ndv  100000k fpp    1.0%           4.326e+04             0.2329X
-//       ndv  100000k fpp    0.1%           4.185e+04             0.2253X
+// With AVX2:
 //
-// find:                 Function     Rate (iters/ms)          Comparison
-// ----------------------------------------------------------------------
-// present ndv      10k fpp   10.0%           2.277e+05                  1X
-// absent  ndv      10k fpp   10.0%           2.258e+05             0.9914X
-// present ndv      10k fpp    1.0%           2.277e+05                  1X
-// absent  ndv      10k fpp    1.0%           2.295e+05              1.008X
-// present ndv      10k fpp    0.1%           2.258e+05             0.9916X
-// absent  ndv      10k fpp    0.1%           2.283e+05              1.003X
-// present ndv    1000k fpp   10.0%           1.799e+05             0.7901X
-// absent  ndv    1000k fpp   10.0%           1.777e+05             0.7803X
-// present ndv    1000k fpp    1.0%            1.52e+05             0.6674X
-// absent  ndv    1000k fpp    1.0%           1.625e+05             0.7134X
-// present ndv    1000k fpp    0.1%           1.825e+05             0.8013X
-// absent  ndv    1000k fpp    0.1%           1.836e+05              0.806X
-// present ndv  100000k fpp   10.0%           4.125e+04             0.1811X
-// absent  ndv  100000k fpp   10.0%           4.147e+04             0.1821X
-// present ndv  100000k fpp    1.0%           4.203e+04             0.1845X
-// absent  ndv  100000k fpp    1.0%           4.189e+04             0.1839X
-// present ndv  100000k fpp    0.1%           3.506e+04             0.1539X
-// absent  ndv  100000k fpp    0.1%           3.507e+04              0.154X
+// insert:                    Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//            ndv      10k fpp   10.0%           2.03e+05 2.05e+05 2.08e+05    
     1X         1X         1X
+//            ndv      10k fpp    1.0%           2.03e+05 2.06e+05 2.08e+05    
 0.997X         1X         1X
+//            ndv      10k fpp    0.1%           2.03e+05 2.05e+05 2.07e+05    
 0.997X     0.998X     0.997X
+//            ndv    1000k fpp   10.0%           1.82e+05 1.87e+05 1.89e+05    
 0.896X      0.91X     0.907X
+//            ndv    1000k fpp    1.0%           1.49e+05 1.53e+05 1.56e+05    
 0.731X     0.747X      0.75X
+//            ndv    1000k fpp    0.1%           1.79e+05 1.82e+05 1.83e+05    
 0.881X     0.886X     0.882X
+//            ndv  100000k fpp   10.0%           4.08e+04 4.49e+04 5.44e+04    
 0.201X     0.219X     0.262X
+//            ndv  100000k fpp    1.0%           3.94e+04  4.4e+04 5.04e+04    
 0.194X     0.214X     0.242X
+//            ndv  100000k fpp    0.1%           4.08e+04 4.48e+04 5.68e+04    
 0.201X     0.218X     0.273X
 //
+// find:                      Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//    present ndv      10k fpp   10.0%           2.48e+05 2.51e+05 2.53e+05    
     1X         1X         1X
+//    absent  ndv      10k fpp   10.0%           2.47e+05 2.52e+05 2.55e+05    
 0.995X         1X      1.01X
+//    present ndv      10k fpp    1.0%           2.49e+05 2.52e+05 2.55e+05    
     1X      1.01X      1.01X
+//    absent  ndv      10k fpp    1.0%           2.47e+05 2.53e+05 2.56e+05    
 0.997X      1.01X      1.01X
+//    present ndv      10k fpp    0.1%           2.49e+05 2.53e+05 2.54e+05    
     1X      1.01X      1.01X
+//    absent  ndv      10k fpp    0.1%           2.47e+05 2.53e+05 2.56e+05    
 0.997X      1.01X      1.01X
+//    present ndv    1000k fpp   10.0%           1.98e+05 2.04e+05 2.06e+05    
   0.8X     0.814X     0.812X
+//    absent  ndv    1000k fpp   10.0%           2.01e+05 2.07e+05  2.1e+05    
 0.808X     0.826X     0.829X
+//    present ndv    1000k fpp    1.0%           1.83e+05 1.95e+05 2.02e+05    
 0.737X      0.78X     0.798X
+//    absent  ndv    1000k fpp    1.0%           2.01e+05 2.04e+05 2.08e+05    
 0.808X     0.815X      0.82X
+//    present ndv    1000k fpp    0.1%           1.96e+05 2.01e+05 2.03e+05    
 0.788X       0.8X     0.801X
+//    absent  ndv    1000k fpp    0.1%              2e+05 2.05e+05 2.07e+05    
 0.808X     0.817X     0.818X
+//    present ndv  100000k fpp   10.0%            4.6e+04 5.09e+04 6.08e+04    
 0.185X     0.203X      0.24X
+//    absent  ndv  100000k fpp   10.0%           4.11e+04 4.36e+04 4.53e+04    
 0.166X     0.174X     0.179X
+//    present ndv  100000k fpp    1.0%           4.55e+04 4.96e+04 6.19e+04    
 0.184X     0.198X     0.245X
+//    absent  ndv  100000k fpp    1.0%           3.83e+04 4.15e+04 4.69e+04    
 0.154X     0.166X     0.186X
+//    present ndv  100000k fpp    0.1%           4.73e+04 5.43e+04 6.58e+04    
 0.191X     0.217X      0.26X
+//    absent  ndv  100000k fpp    0.1%           3.77e+04 4.07e+04 4.37e+04    
 0.152X     0.163X     0.173X
 //
 // Without AVX2:
 //
-// initialize:           Function     Rate (iters/ms)          Comparison
-// ----------------------------------------------------------------------
-//       ndv      10k fpp   10.0%                6453                  1X
-//       ndv      10k fpp    1.0%                3271             0.5068X
-//       ndv      10k fpp    0.1%                1280             0.1984X
-//       ndv    1000k fpp   10.0%               5.213          0.0008078X
-//       ndv    1000k fpp    1.0%               2.574          0.0003989X
-//       ndv    1000k fpp    0.1%               2.584          0.0004005X
-//       ndv  100000k fpp   10.0%             0.03276          5.076e-06X
-//       ndv  100000k fpp    1.0%             0.03224          4.996e-06X
-//       ndv  100000k fpp    0.1%              0.0161          2.494e-06X
+// insert:                    Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//            ndv      10k fpp   10.0%           1.25e+05 1.27e+05 1.28e+05    
     1X         1X         1X
+//            ndv      10k fpp    1.0%           1.27e+05 1.29e+05  1.3e+05    
  1.01X      1.02X      1.02X
+//            ndv      10k fpp    0.1%           1.26e+05 1.28e+05  1.3e+05    
     1X      1.01X      1.01X
+//            ndv    1000k fpp   10.0%           1.23e+05 1.25e+05 1.26e+05    
 0.977X     0.981X     0.985X
+//            ndv    1000k fpp    1.0%           1.16e+05 1.22e+05 1.23e+05    
 0.925X     0.958X     0.958X
+//            ndv    1000k fpp    0.1%           1.16e+05 1.22e+05 1.23e+05    
 0.928X     0.958X     0.957X
+//            ndv  100000k fpp   10.0%           3.77e+04 4.06e+04 5.62e+04    
 0.301X     0.319X     0.438X
+//            ndv  100000k fpp    1.0%           3.71e+04 4.06e+04 5.45e+04    
 0.296X      0.32X     0.425X
+//            ndv  100000k fpp    0.1%           3.37e+04 3.68e+04 5.15e+04    
 0.269X      0.29X     0.401X
 //
-// insert:               Function     Rate (iters/ms)          Comparison
-// ----------------------------------------------------------------------
-//       ndv      10k fpp   10.0%           1.128e+05                  1X
-//       ndv      10k fpp    1.0%           1.162e+05               1.03X
-//       ndv      10k fpp    0.1%           1.145e+05              1.015X
-//       ndv    1000k fpp   10.0%           1.086e+05             0.9626X
-//       ndv    1000k fpp    1.0%           8.377e+04             0.7427X
-//       ndv    1000k fpp    0.1%           8.902e+04             0.7892X
-//       ndv  100000k fpp   10.0%           2.548e+04             0.2259X
-//       ndv  100000k fpp    1.0%            2.37e+04             0.2101X
-//       ndv  100000k fpp    0.1%           2.256e+04                0.2X
-//
-// find:                 Function     Rate (iters/ms)          Comparison
-// ----------------------------------------------------------------------
-// present ndv      10k fpp   10.0%           1.676e+05                  1X
-// absent  ndv      10k fpp   10.0%           1.067e+05             0.6366X
-// present ndv      10k fpp    1.0%           1.683e+05              1.004X
-// absent  ndv      10k fpp    1.0%           1.291e+05             0.7705X
-// present ndv      10k fpp    0.1%           1.662e+05             0.9917X
-// absent  ndv      10k fpp    0.1%           2.238e+05              1.336X
-// present ndv    1000k fpp   10.0%           1.231e+05             0.7344X
-// absent  ndv    1000k fpp   10.0%           6.903e+04             0.4119X
-// present ndv    1000k fpp    1.0%           1.215e+05              0.725X
-// absent  ndv    1000k fpp    1.0%           1.124e+05             0.6707X
-// present ndv    1000k fpp    0.1%           1.095e+05             0.6532X
-// absent  ndv    1000k fpp    0.1%           1.034e+05             0.6171X
-// present ndv  100000k fpp   10.0%           2.733e+04             0.1631X
-// absent  ndv  100000k fpp   10.0%           3.447e+04             0.2057X
-// present ndv  100000k fpp    1.0%           2.779e+04             0.1658X
-// absent  ndv  100000k fpp    1.0%            3.36e+04             0.2005X
-// present ndv  100000k fpp    0.1%           2.725e+04             0.1626X
-// absent  ndv  100000k fpp    0.1%           4.342e+04             0.2591X
+// find:                      Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//    present ndv      10k fpp   10.0%            1.6e+05 1.64e+05 1.66e+05    
     1X         1X         1X
+//    absent  ndv      10k fpp   10.0%           1.11e+05 1.14e+05 1.15e+05    
 0.696X     0.697X     0.695X
+//    present ndv      10k fpp    1.0%           1.57e+05 1.63e+05 1.64e+05    
 0.982X     0.994X     0.989X
+//    absent  ndv      10k fpp    1.0%            1.3e+05 1.33e+05 1.35e+05    
 0.814X     0.813X     0.812X
+//    present ndv      10k fpp    0.1%           1.55e+05 1.58e+05 1.61e+05    
 0.967X     0.968X     0.969X
+//    absent  ndv      10k fpp    0.1%           2.26e+05 2.29e+05 2.31e+05    
  1.41X       1.4X       1.4X
+//    present ndv    1000k fpp   10.0%           1.21e+05 1.23e+05 1.25e+05    
 0.758X     0.753X     0.756X
+//    absent  ndv    1000k fpp   10.0%            7.6e+04 7.72e+04 7.81e+04    
 0.475X     0.472X     0.471X
+//    present ndv    1000k fpp    1.0%           1.23e+05 1.27e+05 1.28e+05    
 0.771X     0.773X      0.77X
+//    absent  ndv    1000k fpp    1.0%           1.19e+05 1.21e+05 1.22e+05    
 0.744X     0.739X     0.738X
+//    present ndv    1000k fpp    0.1%           1.17e+05 1.18e+05  1.2e+05    
 0.731X     0.724X     0.723X
+//    absent  ndv    1000k fpp    0.1%           1.13e+05 1.16e+05 1.17e+05    
 0.707X     0.706X     0.705X
+//    present ndv  100000k fpp   10.0%           3.42e+04 3.63e+04  3.9e+04    
 0.214X     0.222X     0.235X
+//    absent  ndv  100000k fpp   10.0%            3.6e+04 3.77e+04 3.82e+04    
 0.225X      0.23X      0.23X
+//    present ndv  100000k fpp    1.0%           3.18e+04 3.42e+04 3.57e+04    
 0.199X     0.209X     0.216X
+//    absent  ndv  100000k fpp    1.0%           3.63e+04 3.73e+04 3.79e+04    
 0.227X     0.228X     0.229X
+//    present ndv  100000k fpp    0.1%           2.89e+04  3.2e+04 3.33e+04    
  0.18X     0.196X     0.201X
+//    absent  ndv  100000k fpp    0.1%           4.56e+04 4.78e+04 4.86e+04    
 0.285X     0.292X     0.293X
 
 // Make a random uint32_t, avoiding the absent high bit and the low-entropy 
low bits
 // produced by rand().
@@ -149,7 +140,7 @@ uint32_t MakeRand() {
 namespace initialize {
 
 void Benchmark(int batch_size, void* data) {
-  int * d = reinterpret_cast<int*>(data);
+  int* d = reinterpret_cast<int*>(data);
   for (int i = 0; i < batch_size; ++i) {
     BloomFilter bf(*d);
   }
@@ -157,6 +148,7 @@ void Benchmark(int batch_size, void* data) {
 
 }  // namespace initialize
 
+
 // Benchmark insert
 namespace insert {
 
@@ -231,25 +223,13 @@ void RunBenchmarks() {
   char name[120];
 
   {
-    Benchmark suite("initialize");
-    for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
-      for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
-        int* d = new int(BloomFilter::MinLogSpace(ndv, fpp));
-        snprintf(name, sizeof(name), "ndv %7dk fpp %6.1f%%", ndv/1000, 
fpp*100);
-        suite.AddBenchmark(name, initialize::Benchmark, d);
-      }
-    }
-    cout << suite.Measure() << endl;
-  }
-
-  {
     Benchmark suite("insert");
+    vector<unique_ptr<insert::TestData> > testdata;
     for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
       for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
-        insert::TestData* d =
-            new insert::TestData(BloomFilter::MinLogSpace(ndv, fpp));
+        testdata.emplace_back(new 
insert::TestData(BloomFilter::MinLogSpace(ndv, fpp)));
         snprintf(name, sizeof(name), "ndv %7dk fpp %6.1f%%", ndv/1000, 
fpp*100);
-        suite.AddBenchmark(name, insert::Benchmark, d);
+        suite.AddBenchmark(name, insert::Benchmark, testdata.back().get());
       }
     }
     cout << suite.Measure() << endl;
@@ -257,15 +237,16 @@ void RunBenchmarks() {
 
   {
     Benchmark suite("find");
+    vector<unique_ptr<find::TestData> > testdata;
     for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
       for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
-        find::TestData* d = new find::TestData(BloomFilter::MinLogSpace(ndv, 
fpp), ndv);
-
+        testdata.emplace_back(
+            new find::TestData(BloomFilter::MinLogSpace(ndv, fpp), ndv));
         snprintf(name, sizeof(name), "present ndv %7dk fpp %6.1f%%", ndv/1000, 
fpp*100);
-        suite.AddBenchmark(name, find::Present, d);
+        suite.AddBenchmark(name, find::Present, testdata.back().get());
 
         snprintf(name, sizeof(name), "absent  ndv %7dk fpp %6.1f%%", ndv/1000, 
fpp*100);
-        suite.AddBenchmark(name, find::Absent, d);
+        suite.AddBenchmark(name, find::Absent, testdata.back().get());
       }
     }
     cout << suite.Measure() << endl;
@@ -274,8 +255,24 @@ void RunBenchmarks() {
 
 int main(int argc, char **argv) {
   CpuInfo::Init();
-  cout << endl << Benchmark::GetMachineInfo() << endl << endl
-       << "With AVX2:" << endl << endl;
+
+  cout << endl << Benchmark::GetMachineInfo() << endl << endl;
+
+  {
+    char name[120];
+    Benchmark suite("initialize");
+    vector<unique_ptr<int> > testdata;
+    for (int ndv = 10000; ndv <= 100 * 1000 * 1000; ndv *= 100) {
+      for (double fpp = 0.1; fpp >= 0.001; fpp /= 10) {
+        testdata.emplace_back(new int(BloomFilter::MinLogSpace(ndv, fpp)));
+        snprintf(name, sizeof(name), "ndv %7dk fpp %6.1f%%", ndv / 1000, fpp * 
100);
+        suite.AddBenchmark(name, initialize::Benchmark, testdata.back().get());
+      }
+    }
+    cout << suite.Measure() << endl;
+  }
+
+  cout << "With AVX2:" << endl << endl;
   RunBenchmarks();
   cout << endl << "Without AVX2:" << endl << endl;
   CpuInfo::TempDisable t(CpuInfo::AVX2);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/476f687b/be/src/benchmarks/string-compare-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/string-compare-benchmark.cc 
b/be/src/benchmarks/string-compare-benchmark.cc
index 9ac2857..a51b907 100644
--- a/be/src/benchmarks/string-compare-benchmark.cc
+++ b/be/src/benchmarks/string-compare-benchmark.cc
@@ -24,6 +24,29 @@
 
 using namespace impala;
 
+// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
+//
+// Long strings (10000):      Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//                            Original                 85     86.5     86.5    
     1X         1X         1X
+//                  Simplified, broken               76.6       78       78    
 0.901X     0.901X     0.901X
+//                   Simplified, fixed               95.8     97.5     97.5    
  1.13X      1.13X      1.13X
+//
+// Med strings (100):         Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//                            Original           6.55e+03 6.66e+03 6.74e+03    
     1X         1X         1X
+//                  Simplified, broken           6.25e+03 6.32e+03 6.38e+03    
 0.955X     0.949X     0.947X
+//                   Simplified, fixed           7.38e+03 7.49e+03 7.55e+03    
  1.13X      1.12X      1.12X
+//
+// Short strings (10):        Function  iters/ms   10%ile   50%ile   90%ile    
 10%ile     50%ile     90%ile
+//                                                                          
(relative) (relative) (relative)
+// 
---------------------------------------------------------------------------------------------------------
+//                            Original           1.59e+04 1.62e+04 1.63e+04    
     1X         1X         1X
+//                  Simplified, broken            2.8e+04 2.85e+04 2.87e+04    
  1.76X      1.76X      1.76X
+//                   Simplified, fixed           2.92e+04 2.96e+04 2.99e+04    
  1.83X      1.83X      1.84X
+
 // Original
 int StringCompare1(const char* s1, int n1, const char* s2, int n2, int len) {
   DCHECK_EQ(len, std::min(n1, n2));
@@ -178,27 +201,28 @@ TestData InitTestData(int len) {
 
 int main(int argc, char **argv) {
   CpuInfo::Init();
+  cout << Benchmark::GetMachineInfo() << endl << endl;
 
   Benchmark long_suite("Long strings (10000)");
   TestData long_data = InitTestData(10000);
   long_suite.AddBenchmark("Original", TestStringCompare1, &long_data);
   long_suite.AddBenchmark("Simplified, broken", TestStringCompare2, 
&long_data);
   long_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &long_data);
-  cout << long_suite.Measure();
+  cout << long_suite.Measure() << endl;
 
   Benchmark med_suite("Med strings (100)");
   TestData med_data = InitTestData(100);
   med_suite.AddBenchmark("Original", TestStringCompare1, &med_data);
   med_suite.AddBenchmark("Simplified, broken", TestStringCompare2, &med_data);
   med_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, &med_data);
-  cout << med_suite.Measure();
+  cout << med_suite.Measure() << endl;
 
   Benchmark short_suite("Short strings (10)");
   TestData short_data = InitTestData(10);
   short_suite.AddBenchmark("Original", TestStringCompare1, &short_data);
   short_suite.AddBenchmark("Simplified, broken", TestStringCompare2, 
&short_data);
   short_suite.AddBenchmark("Simplified, fixed", TestStringCompare3, 
&short_data);
-  cout << short_suite.Measure();
+  cout << short_suite.Measure() << endl;
 
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/476f687b/be/src/util/benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/util/benchmark.cc b/be/src/util/benchmark.cc
index 8ca2816..f0bc055 100644
--- a/be/src/util/benchmark.cc
+++ b/be/src/util/benchmark.cc
@@ -86,21 +86,48 @@ string Benchmark::Measure() {
   // Run a warmup to iterate through the data
   benchmarks_[0].fn(10, benchmarks_[0].args);
 
+  // The number of times a benchmark is repeated
+  const int NUM_REPS = 60;
+  // Which percentiles of the benchmark to report. Reports the LO_PERCENT, 
MID_PERCENT,
+  // and HI_PERCENT percentile result.
+  const int LO_PERCENT = 10;
+  const int MID_PERCENT = 50;
+  const int HI_PERCENT = 100 - LO_PERCENT;
+  const size_t LO_IDX =
+      floor(((LO_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
+  const size_t MID_IDX =
+      floor(((MID_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
+  const size_t HI_IDX =
+      floor(((HI_PERCENT / 100.0) * static_cast<double>(NUM_REPS)) - 0.5);
+
+  const int function_out_width = 35;
+  const int rate_out_width = 10;
+  const int percentile_out_width = 9;
+  const int comparison_out_width = 11;
+  const int padding = 0;
+  const int total_width = function_out_width + rate_out_width + 3 * 
comparison_out_width +
+      3 * percentile_out_width + padding;
+
   stringstream ss;
-  for (int i = 0; i < benchmarks_.size(); ++i) {
-    benchmarks_[i].rate = Measure(benchmarks_[i].fn, benchmarks_[i].args);
+  for (int j = 0; j < NUM_REPS; ++j) {
+    for (int i = 0; i < benchmarks_.size(); ++i) {
+      benchmarks_[i].rates.push_back(Measure(benchmarks_[i].fn, 
benchmarks_[i].args));
+    }
   }
 
-  int function_out_width = 30;
-  int rate_out_width = 20;
-  int comparison_out_width = 20;
-  int padding = 0;
-  int total_width = function_out_width + rate_out_width + comparison_out_width 
+ padding;
-
   ss << name_ << ":"
      << setw(function_out_width - name_.size() - 1) << "Function"
-     << setw(rate_out_width) << "Rate (iters/ms)"
-     << setw(comparison_out_width) << "Comparison" << endl;
+     << setw(rate_out_width) << "iters/ms"
+     << setw(percentile_out_width -4) << LO_PERCENT << "%ile"
+     << setw(percentile_out_width - 4) << MID_PERCENT << "%ile"
+     << setw(percentile_out_width - 4) << HI_PERCENT << "%ile"
+     << setw(comparison_out_width - 4) << LO_PERCENT << "%ile"
+     << setw(comparison_out_width - 4) << MID_PERCENT << "%ile"
+     << setw(comparison_out_width - 4) << HI_PERCENT << "%ile" << endl;
+  ss << setw(function_out_width + rate_out_width + 3 * percentile_out_width +
+            comparison_out_width) << "(relative)"
+     << setw(comparison_out_width) << "(relative)"
+     << setw(comparison_out_width) << "(relative)" << endl;
   for (int i = 0; i < total_width; ++i) {
     ss << '-';
   }
@@ -108,12 +135,24 @@ string Benchmark::Measure() {
 
   int previous_baseline_idx = -1;
   for (int i = 0; i < benchmarks_.size(); ++i) {
-    double base_line = benchmarks_[benchmarks_[i].baseline_idx].rate;
+    sort(benchmarks_[i].rates.begin(), benchmarks_[i].rates.end());
+    const double base_line_lo = 
benchmarks_[benchmarks_[i].baseline_idx].rates[LO_IDX];
+    const double base_line_mid = 
benchmarks_[benchmarks_[i].baseline_idx].rates[MID_IDX];
+    const double base_line_hi = 
benchmarks_[benchmarks_[i].baseline_idx].rates[HI_IDX];
     if (previous_baseline_idx != benchmarks_[i].baseline_idx && i > 0) ss << 
endl;
     ss << setw(function_out_width) << benchmarks_[i].name
-       << setw(rate_out_width) << setprecision(4) << benchmarks_[i].rate
-       << setw(comparison_out_width - 1) << setprecision(4)
-       << (benchmarks_[i].rate / base_line) << "X" << endl;
+       << setw(rate_out_width + percentile_out_width) << setprecision(3)
+           << benchmarks_[i].rates[LO_IDX]
+       << setw(percentile_out_width) << setprecision(3)
+           << benchmarks_[i].rates[MID_IDX]
+       << setw(percentile_out_width) << setprecision(3)
+           << benchmarks_[i].rates[HI_IDX]
+       << setw(comparison_out_width - 1) << setprecision(3)
+       << (benchmarks_[i].rates[LO_IDX] / base_line_lo) << "X"
+       << setw(comparison_out_width - 1) << setprecision(3)
+       << (benchmarks_[i].rates[MID_IDX] / base_line_mid) << "X"
+       << setw(comparison_out_width - 1) << setprecision(3)
+       << (benchmarks_[i].rates[HI_IDX] / base_line_hi) << "X" << endl;
     previous_baseline_idx = benchmarks_[i].baseline_idx;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/476f687b/be/src/util/benchmark.h
----------------------------------------------------------------------
diff --git a/be/src/util/benchmark.h b/be/src/util/benchmark.h
index 806cff8..e1d476b 100644
--- a/be/src/util/benchmark.h
+++ b/be/src/util/benchmark.h
@@ -62,13 +62,13 @@ class Benchmark {
   /// harness function will automatically ramp up the batch_size.  The 
benchmark
   /// will take *at least* initial_batch_size * function invocation time.
   static double Measure(BenchmarkFunction function, void* args,
-      int max_time = 1000, int initial_batch_size = 1000);
+      int max_time = 50, int initial_batch_size = 10);
 
   struct BenchmarkResult {
     std::string name;
     BenchmarkFunction fn;
     void* args;
-    double rate;
+    std::vector<double> rates;
     int baseline_idx;
   };
 

Reply via email to