On 15/06/17 08:51, Kenneth Graunke wrote:
On Wednesday, June 14, 2017 5:38:31 PM PDT Lionel Landwerlin wrote:
Due to an underlying hardware race condition, we have no guarantee
that all the reports coming from the OA buffer related to the workload
we're trying to measure have landed to memory by the time all the work
submitted has completed. That means we need to keep on reading the OA
stream until we read a report with a timestamp older than the
timestamp newer / larger / more recent, right?  i.e. keep going until
you find a report in the stream beyond the expected end?  (Unless we're
reading backwards...)

More recent indeed :)
I meant older in the sense that the number is larger (like I'm older because my age is a bigger number...), but it's all inverted here :)

Fixed, thanks a lot!


timestamp recored by the MI_REPORT_PERF_COUNT at the end of the
performance query.

v2: fix uninitialized offset variable to 0 (Lionel)

v3: rework the reading to avoid blocking the user of the API unless
     requested (Rob)

v4: fix a bug that makes the i965 driver reading the perf stream when
     not necessary, leading to very long counter accumulation times
     (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Cc: Robert Bragg <rob...@sixbynine.org>
---
  src/mesa/drivers/dri/i965/brw_performance_query.c | 133 ++++++++++++++++++----
  1 file changed, 113 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index d10141bf07a..d11784c0352 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
     int refcount;
     int len;
     uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
+   uint32_t last_timestamp;
  };
/**
@@ -244,6 +245,11 @@ struct brw_perf_query_object
           struct brw_bo *bo;
/**
+          * Address of mapped of @bo
+          */
+         void *map;
+
+         /**
            * The MI_REPORT_PERF_COUNT command lets us specify a unique
            * ID that will be reflected in the resulting OA report
            * that's written by the GPU. This is the ID we're expecting
@@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw)
     }
  }
-static bool
-read_oa_samples(struct brw_context *brw)
+enum OaReadStatus {
+   OA_READ_STATUS_ERROR,
+   OA_READ_STATUS_UNFINISHED,
+   OA_READ_STATUS_FINISHED,
+};
+
+static enum OaReadStatus
+read_oa_samples_until(struct brw_context *brw,
+                      uint32_t start_timestamp,
+                      uint32_t end_timestamp)
  {
+   struct exec_node *tail_node =
+      exec_list_get_tail(&brw->perfquery.sample_buffers);
+   struct brw_oa_sample_buf *tail_buf =
+      exec_node_data(struct brw_oa_sample_buf, tail_node, link);
+   uint32_t last_timestamp = tail_buf->last_timestamp;
+
     while (1) {
        struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
+      uint32_t offset;
        int len;
while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
@@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw)
if (len < 0) {
              if (errno == EAGAIN)
-               return true;
+               return ((last_timestamp - start_timestamp) >=
+                       (end_timestamp - start_timestamp)) ?
+                      OA_READ_STATUS_FINISHED :
+                      OA_READ_STATUS_UNFINISHED;
              else {
                 DBG("Error reading i915 perf samples: %m\n");
-               return false;
              }
-         } else {
+         } else
              DBG("Spurious EOF reading i915 perf samples\n");
-            return false;
-         }
+
+         return OA_READ_STATUS_ERROR;
        }
buf->len = len;
        exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
+
+      /* Go through the reports and update the last timestamp. */
+      offset = 0;
+      while (offset < buf->len) {
+         const struct drm_i915_perf_record_header *header =
+            (const struct drm_i915_perf_record_header *) &buf->buf[offset];
+         uint32_t *report = (uint32_t *) (header + 1);
+
+         if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
+            last_timestamp = report[1];
+
+         offset += header->size;
+      }
+
+      buf->last_timestamp = last_timestamp;
     }
unreachable("not reached");
+   return OA_READ_STATUS_ERROR;
+}
+
+/**
+ * Try to read all the reports until either the delimiting timestamp
+ * or an error arises.
+ */
+static bool
+read_oa_samples_for_query(struct brw_context *brw,
+                          struct brw_perf_query_object *obj)
+{
+   uint32_t *start;
+   uint32_t *last;
+   uint32_t *end;
+
+   /* We need the MI_REPORT_PERF_COUNT to land before we can start
+    * accumulate. */
    */ goes on its own line (here and elsewhere).

This all looks right to me.

Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to