Re: [Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 03:00:43PM +0530, sourab.gu...@intel.com wrote:
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 06c7b55..0dc2384 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private 
> *dev_priv)
>   DRM_DEBUG_DRIVER("can't enable MSI");
>   }
>  
> + i915_perf_init_late(dev_priv);
> +
>   return 0;

Just a quick one:

Create i915_driver_init_late() to capture the new init phase you want to add.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

Currently, we have the ability to only forward the GPU timestamps in the
samples (which are generated via OA reports or PIPE_CONTROL commands
inserted in the ring). This limits the ability to correlate these samples
with the system events. If we scale the GPU timestamps according the
timestamp base/frequency info present in bspec, it is observed that the
timestamps drift really quickly from the system time.

An ability is therefore needed to report timestamps in different clock
domains, such as CLOCK_MONOTONIC (or _MONO_RAW), in the perf samples to
be of more practical use to the userspace. This ability becomes important
when we want to correlate/plot GPU events/samples with other system events
on the same timeline (e.g. vblank events, or timestamps when work was
submitted to kernel, etc.)

The patch here proposes a mechanism to achieve this. The correlation between
gpu time and system time is established using the cross timestamp framework.
For this purpose, the timestamp clock associated with the command stream, is
abstracted as timecounter/cyclecounter, before utilizing cross timestamp
framework to retrieve gpu/system time correlated values.
Different such gpu/system time values are then used to detect and correct
the error in published gpu timestamp clock frequency. The userspace can
request CLOCK_MONOTONIC_RAW timestamps in samples by requesting the
corresponding property while opening the stream.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h  |  24 +++-
 drivers/gpu/drm/i915/i915_perf.c | 273 +++
 include/uapi/drm/i915_drm.h  |   9 +-
 4 files changed, 284 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 06c7b55..0dc2384 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private 
*dev_priv)
DRM_DEBUG_DRIVER("can't enable MSI");
}
 
+   i915_perf_init_late(dev_priv);
+
return 0;
 
 out_ggtt:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e912679..557a124 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -42,6 +42,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -1843,6 +1846,9 @@ struct i915_perf_stream {
/* Whether the OA unit is in use */
bool using_oa;
 
+   /* monotonic_raw clk timestamp (in ns) for last sample */
+   u64 last_sample_ts;
+
const struct i915_perf_stream_ops *ops;
 };
 
@@ -1889,6 +1895,20 @@ struct i915_perf_cs_data_node {
u32 tag;
 };
 
+/**
+ * struct i915_clock_info - decribes i915 timestamp clock
+ *
+ */
+struct i915_clock_info {
+   struct cyclecounter cc;
+   struct timecounter tc;
+   struct system_device_crosststamp xtstamp;
+   ktime_t clk_offset; /* Offset (in ns) between monoraw clk and gpu time 
*/
+   u32 timestamp_frequency;
+   u32 resync_period; /* in msecs */
+   struct delayed_work clk_sync_work;
+};
+
 struct drm_i915_private {
struct drm_device drm;
 
@@ -2189,6 +2209,8 @@ struct drm_i915_private {
 
struct i915_runtime_pm pm;
 
+   struct i915_clock_info ts_clk_info;
+
struct {
bool initialized;
 
@@ -2213,7 +2235,6 @@ struct drm_i915_private {
 
bool periodic;
int period_exponent;
-   int timestamp_frequency;
 
int tail_margin;
 
@@ -3796,6 +3817,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
 
 /* i915_perf.c */
 extern void i915_perf_init(struct drm_i915_private *dev_priv);
+extern void i915_perf_init_late(struct drm_i915_private *dev_priv);
 extern void i915_perf_fini(struct drm_i915_private *dev_priv);
 extern void i915_perf_register(struct drm_i915_private *dev_priv);
 extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 8eb80e8..b11e953 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -189,6 +189,7 @@
 
 #include 
 #include 
+#include 
 
 #include "i915_drv.h"
 #include "intel_ringbuffer.h"
@@ -228,6 +229,9 @@
 #define POLL_FREQUENCY 200
 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
 
+#define MAX_CLK_SYNC_PERIOD (60*MSEC_PER_SEC)
+#define INIT_CLK_SYNC_PERIOD (20) /* in msecs */
+
 static u32 i915_perf_stream_paranoid = true;
 
 /* The maximum exponent the hardware accepts is 63 (essentially it selects one
@@ -254,13 +258,24 @@ static u32 i915_perf_stream_paranoid = true;
 #define TS_ADDR_ALIGN 8
 #define I915_PERF_TS_SAMPLE_SIZE 8
 
+/* Published frequency of GT command stream timestamp 

[Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples

2016-06-01 Thread sourab . gupta
From: Sourab Gupta 

Currently, we have the ability to only forward the GPU timestamps in the
samples (which are generated via OA reports or PIPE_CONTROL commands
inserted in the ring). This limits the ability to correlate these samples
with the system events. If we scale the GPU timestamps according the
timestamp base/frequency info present in bspec, it is observed that the
timestamps drift really quickly from the system time.

An ability is therefore needed to report timestamps in different clock
domains, such as CLOCK_MONOTONIC (or _MONO_RAW), in the perf samples to
be of more practical use to the userspace. This ability becomes important
when we want to correlate/plot GPU events/samples with other system events
on the same timeline (e.g. vblank events, or timestamps when work was
submitted to kernel, etc.)

The patch here proposes a mechanism to achieve this. The correlation between
gpu time and system time is established using the cross timestamp framework.
For this purpose, the timestamp clock associated with the command stream, is
abstracted as timecounter/cyclecounter, before utilizing cross timestamp
framework to retrieve gpu/system time correlated values.
Different such gpu/system time values are then used to detect and correct
the error in published gpu timestamp clock frequency. The userspace can
request CLOCK_MONOTONIC_RAW timestamps in samples by requesting the
corresponding property while opening the stream.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_dma.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h  |  24 +++-
 drivers/gpu/drm/i915/i915_perf.c | 273 +++
 include/uapi/drm/i915_drm.h  |   9 +-
 4 files changed, 284 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index ab1f6c4..01f3559 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1327,6 +1327,8 @@ static int i915_driver_init_hw(struct drm_i915_private 
*dev_priv)
DRM_DEBUG_DRIVER("can't enable MSI");
}
 
+   i915_perf_init_late(dev_priv);
+
return 0;
 
 out_ggtt:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9ccac83..d99ea73 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -42,6 +42,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -1825,6 +1828,9 @@ struct i915_perf_stream {
/* Whether the OA unit is in use */
bool using_oa;
 
+   /* monotonic_raw clk timestamp (in ns) for last sample */
+   u64 last_sample_ts;
+
const struct i915_perf_stream_ops *ops;
 };
 
@@ -1869,6 +1875,20 @@ struct i915_perf_cs_data_node {
u32 tag;
 };
 
+/**
+ * struct i915_clock_info - decribes i915 timestamp clock
+ *
+ */
+struct i915_clock_info {
+   struct cyclecounter cc;
+   struct timecounter tc;
+   struct system_device_crosststamp xtstamp;
+   ktime_t clk_offset; /* Offset (in ns) between monoraw clk and gpu time 
*/
+   u32 timestamp_frequency;
+   u32 resync_period; /* in msecs */
+   struct delayed_work clk_sync_work;
+};
+
 struct drm_i915_private {
struct drm_device *dev;
struct kmem_cache *objects;
@@ -2147,6 +2167,8 @@ struct drm_i915_private {
 
struct i915_runtime_pm pm;
 
+   struct i915_clock_info ts_clk_info;
+
struct {
bool initialized;
 
@@ -2169,7 +2191,6 @@ struct drm_i915_private {
 
bool periodic;
int period_exponent;
-   int timestamp_frequency;
 
int tail_margin;
 
@@ -3699,6 +3720,7 @@ int i915_parse_cmds(struct intel_engine_cs *engine,
 
 /* i915_perf.c */
 extern void i915_perf_init(struct drm_device *dev);
+extern void i915_perf_init_late(struct drm_i915_private *dev_priv);
 extern void i915_perf_fini(struct drm_device *dev);
 
 /* i915_suspend.c */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index aa3589e..e340cf9f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -23,6 +23,7 @@
 
 #include 
 #include 
+#include 
 
 #include "i915_drv.h"
 #include "intel_ringbuffer.h"
@@ -62,6 +63,9 @@
 #define POLL_FREQUENCY 200
 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
 
+#define MAX_CLK_SYNC_PERIOD (60*MSEC_PER_SEC)
+#define INIT_CLK_SYNC_PERIOD (20) /* in msecs */
+
 static u32 i915_perf_stream_paranoid = true;
 
 /* The maximum exponent the hardware accepts is 63 (essentially it selects one
@@ -88,13 +92,24 @@ static u32 i915_perf_stream_paranoid = true;
 #define TS_ADDR_ALIGN 8
 #define I915_PERF_TS_SAMPLE_SIZE 8
 
+/* Published frequency of GT command stream timestamp clock */
+#define FREQUENCY_12_5_MHZ (1250)
+#define FREQUENCY_12_0_MHZ (1200)
+#define