timestamp recored by the MI_REPORT_PERF_COUNT at the end of the
performance query.
v2: fix uninitialized offset variable to 0 (Lionel)
v3: rework the reading to avoid blocking the user of the API unless
requested (Rob)
v4: fix a bug that makes the i965 driver reading the perf stream when
not necessary, leading to very long counter accumulation times
(Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Cc: Robert Bragg <rob...@sixbynine.org>
---
src/mesa/drivers/dri/i965/brw_performance_query.c | 133 ++++++++++++++++++----
1 file changed, 113 insertions(+), 20 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index d10141bf07a..d11784c0352 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
int refcount;
int len;
uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
+ uint32_t last_timestamp;
};
/**
@@ -244,6 +245,11 @@ struct brw_perf_query_object
struct brw_bo *bo;
/**
+ * Address of mapped of @bo
+ */
+ void *map;
+
+ /**
* The MI_REPORT_PERF_COUNT command lets us specify a unique
* ID that will be reflected in the resulting OA report
* that's written by the GPU. This is the ID we're expecting
@@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw)
}
}
-static bool
-read_oa_samples(struct brw_context *brw)
+enum OaReadStatus {
+ OA_READ_STATUS_ERROR,
+ OA_READ_STATUS_UNFINISHED,
+ OA_READ_STATUS_FINISHED,
+};
+
+static enum OaReadStatus
+read_oa_samples_until(struct brw_context *brw,
+ uint32_t start_timestamp,
+ uint32_t end_timestamp)
{
+ struct exec_node *tail_node =
+ exec_list_get_tail(&brw->perfquery.sample_buffers);
+ struct brw_oa_sample_buf *tail_buf =
+ exec_node_data(struct brw_oa_sample_buf, tail_node, link);
+ uint32_t last_timestamp = tail_buf->last_timestamp;
+
while (1) {
struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
+ uint32_t offset;
int len;
while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
@@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw)
if (len < 0) {
if (errno == EAGAIN)
- return true;
+ return ((last_timestamp - start_timestamp) >=
+ (end_timestamp - start_timestamp)) ?
+ OA_READ_STATUS_FINISHED :
+ OA_READ_STATUS_UNFINISHED;
else {
DBG("Error reading i915 perf samples: %m\n");
- return false;
}
- } else {
+ } else
DBG("Spurious EOF reading i915 perf samples\n");
- return false;
- }
+
+ return OA_READ_STATUS_ERROR;
}
buf->len = len;
exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
+
+ /* Go through the reports and update the last timestamp. */
+ offset = 0;
+ while (offset < buf->len) {
+ const struct drm_i915_perf_record_header *header =
+ (const struct drm_i915_perf_record_header *) &buf->buf[offset];
+ uint32_t *report = (uint32_t *) (header + 1);
+
+ if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
+ last_timestamp = report[1];
+
+ offset += header->size;
+ }
+
+ buf->last_timestamp = last_timestamp;
}
unreachable("not reached");
+ return OA_READ_STATUS_ERROR;
+}
+
+/**
+ * Try to read all the reports until either the delimiting timestamp
+ * or an error arises.
+ */
+static bool
+read_oa_samples_for_query(struct brw_context *brw,
+ struct brw_perf_query_object *obj)
+{
+ uint32_t *start;
+ uint32_t *last;
+ uint32_t *end;
+
+ /* We need the MI_REPORT_PERF_COUNT to land before we can start
+ * accumulate. */