[tvm] branch main updated: [Fix,MetaSchedule] Correct log usage in arithmetic intensity feature (#12806)

junrushao Thu, 06 Oct 2022 17:05:17 -0700

This is an automated email from the ASF dual-hosted git repository.

junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new bb2cec1941 [Fix,MetaSchedule] Correct log usage in arithmetic 
intensity feature (#12806)
bb2cec1941 is described below

commit bb2cec1941db6f5a67b85550faa9279c4c14e2a3
Author: Tristan Konolige <[email protected]>
AuthorDate: Thu Oct 6 17:05:05 2022 -0700

    [Fix,MetaSchedule] Correct log usage in arithmetic intensity feature 
(#12806)
    
    In meta schedule's featurization, arithmetic intensity was incorrectly 
calculated as log(FLOPs) / log(bytes). This change corrects it to 
log(FLOPs/bytes). Note that this is the same issue as in #12079.
---
 .../feature_extractor/per_store_feature.cc         | 21 +++++++---
 ...schedule_feature_extractor_per_store_feature.py | 46 ++++++++++++----------
 2 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/src/meta_schedule/feature_extractor/per_store_feature.cc 
b/src/meta_schedule/feature_extractor/per_store_feature.cc
index 698de010b7..422f21abe1 100644
--- a/src/meta_schedule/feature_extractor/per_store_feature.cc
+++ b/src/meta_schedule/feature_extractor/per_store_feature.cc
@@ -1042,6 +1042,17 @@ struct Feature {
   /*!
    * \brief See the wiki page [1] for details
    *
+   * Arithmetic intensity is FLOPs/unique bytes of memory touched. A value is 
computed
+   * for each set of loop nests starting with just the innermost loop and
+   * reaching to include all loops. There are a variable number of loops, so
+   * n_samples are taken from the curve of arithmetic intensity vs flops. This
+   * biases the values towards larger loops.
+   *
+   * Note that the denominator is unique bytes of memory touched. Repeated
+   * access to the same byte of memory counts as only a single byte touched.
+   *
+   * Values are scaled by log2(x + 1).
+   *
    * [1] https://en.wikipedia.org/wiki/Roofline_model
    */
   std::vector<double> arith_intensity_curve;
@@ -1060,7 +1071,7 @@ struct Feature {
     std::vector<double> memory_bytes;
     memory_bytes.resize(n_loops);
     for (int i = 0; i < n_loops; ++i) {
-      memory_bytes[n_loops - 1 - i] = std::log2(for_touched_bytes[i]);
+      memory_bytes[n_loops - 1 - i] = for_touched_bytes[i];
     }
     // Calculate `compute_ops` and `cur_compute_ops`
     std::vector<double> compute_ops;
@@ -1072,7 +1083,7 @@ struct Feature {
       if (const int64_t* extent = GetLoopIntExtent(loops[i])) {
         total_compute_ops *= *extent;
       }
-      compute_ops.push_back(std::log2(total_compute_ops));
+      compute_ops.push_back(total_compute_ops);
     }
     // Fill the feature set
     if (total_compute_ops <= 0 || compute_ops.empty()) {
@@ -1081,7 +1092,7 @@ struct Feature {
       }
       return;
     }
-    total_compute_ops = compute_ops.back();  // i.e. total_compute_ops = 
log2(total_compute_ops)
+    total_compute_ops = compute_ops.back();
     int p = 0;
     for (int i = 0; i < n_samples; ++i) {
       double& result = arith_intensity_curve[i];
@@ -1094,13 +1105,13 @@ struct Feature {
       }
       CHECK_LT(p, n_loops);
       if (p == 0) {
-        result = compute_ops[p] / memory_bytes[p];
+        result = slog(compute_ops[p] / memory_bytes[p]);
       } else {
         double base = compute_ops[p - 1] / memory_bytes[p - 1];
         double slope =
             (compute_ops[p] / memory_bytes[p] - compute_ops[p - 1] / 
memory_bytes[p - 1]) /
             (compute_ops[p] - compute_ops[p - 1]);
-        result = base + slope * (cur_compute_ops - compute_ops[p - 1]);
+        result = slog(base + slope * (cur_compute_ops - compute_ops[p - 1]));
       }
     }
   }
diff --git 
a/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
 
b/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
index cad140b8de..701e1826b3 100644
--- 
a/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
+++ 
b/tests/python/unittest/test_meta_schedule_feature_extractor_per_store_feature.py
@@ -365,19 +365,22 @@ def test_cpu_matmul():
         atol=1e-5,
     )
     # Group 3: Arithmetic intensity
+    # arithmetic intensity = flops/bytes touched = 2*512*512*512/(3 * 4 * 
512*512)
+    #                             add and multiply ^     3 arrays ^   ^ 4 
bytes per f32
+    # = 85.3 but log2 is used so values should be around 6.4
     assert_allclose(
         actual=f[147:157],
         desired=[
-            0.7097842693328857,
-            0.7408391237258911,
-            0.8750449419021606,
-            0.9449487924575806,
-            1.0148526430130005,
-            1.0847564935684204,
-            1.113688349723816,
-            1.1394684314727783,
-            1.2119636535644531,
-            1.2971993684768677,
+            3.812599,
+            4.464822,
+            4.912349,
+            5.253426,
+            5.529086,
+            5.76043,
+            5.959752,
+            6.134849,
+            6.290977,
+            6.431846,
         ],
         rtol=1e-5,
         atol=1e-5,
@@ -1357,19 +1360,22 @@ def test_gpu():
         atol=1e-5,
     )
     # Group 3: Arithmetic intensity
+    # Arithmetic intensity is high here because of repeated use of a shared
+    # buffer. Multiple accesses to the same memory location are counted as a
+    # single byte, skewing these numbers towards higher intensity.
     assert_allclose(
         actual=f[147:157],
         desired=[
-            0.7097842504665767,
-            0.7548801745187567,
-            0.8775907547541741,
-            0.9957389916154509,
-            1.2446737395193135,
-            1.493608487423176,
-            1.7093103019954263,
-            1.8031580276850985,
-            1.9841832691827785,
-            2.204648076869754,
+            11.98533,
+            12.977811,
+            13.562714,
+            13.977722,
+            14.299632,
+            14.562654,
+            14.785038,
+            14.977677,
+            15.147597,
+            15.299596,
         ],
         rtol=1e-5,
         atol=1e-5,

[tvm] branch main updated: [Fix,MetaSchedule] Correct log usage in arithmetic intensity feature (#12806)

Reply via email to