Re: [Nouveau] [RFC PATCH 6/8] nv50: add support for compute/graphics global performance counters

2015-06-28 Thread Samuel Pitoiset



On 06/26/2015 01:09 AM, Ilia Mirkin wrote:

What's with the \%'s everywhere?


Maybe percent will be better ?



On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset
samuel.pitoi...@gmail.com wrote:

This commit adds support for both compute and graphics global
performance counters which have been reverse engineered with
CUPTI (Linux) and PerfKit (Windows).

Currently, only one query type can be monitored at the same time because
the Gallium's HUD doesn't fit pretty well. This will be improved later.

Signed-off-by: Samuel Pitoiset samuel.pitoi...@gmail.com
---
  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1057 +++-
  src/gallium/drivers/nouveau/nv50/nv50_screen.h |   35 +
  2 files changed, 1087 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 1162110..b9d2914 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
  #include nv50/nv50_context.h
  #include nv_object.xml.h

+#include nouveau_perfmon.h
+
  #define NV50_QUERY_STATE_READY   0
  #define NV50_QUERY_STATE_ACTIVE  1
  #define NV50_QUERY_STATE_ENDED   2
@@ -51,10 +53,25 @@ struct nv50_query {
 boolean is64bit;
 struct nouveau_mm_allocation *mm;
 struct nouveau_fence *fence;
+   struct nouveau_object *perfdom;
  };

  #define NV50_QUERY_ALLOC_SPACE 256

+#ifdef DEBUG
+static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args);
+#endif
+
+static boolean
+nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *);
+static void
+nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *);
+static boolean
+nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *);
+static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *);
+static boolean nv50_hw_pm_query_result(struct nv50_context *,
+struct nv50_query *, boolean, void *);
+
  static INLINE struct nv50_query *
  nv50_query(struct pipe_query *pipe)
  {
@@ -96,12 +113,18 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
nv50_query *q, int size)
  static void
  nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
  {
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_query *q = nv50_query(pq);
+
 if (!pq)
return;

-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, nv50_query(pq)-fence);
-   FREE(nv50_query(pq));
+   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST))
+  nv50_hw_pm_query_destroy(nv50, q);
+
+   nv50_query_allocate(nv50, q, 0);
+   nouveau_fence_ref(NULL, q-fence);
+   FREE(q);
  }

  static struct pipe_query *
@@ -130,6 +153,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned 
type, unsigned index)
q-data -= 32 / sizeof(*q-data); /* we advance before query_begin ! */
 }

+   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST)) {
+  if (!nv50_hw_pm_query_create(nv50, q))
+ return NULL;
+   }
+
 return (struct pipe_query *)q;
  }

@@ -154,6 +182,7 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
 struct nv50_context *nv50 = nv50_context(pipe);
 struct nouveau_pushbuf *push = nv50-base.pushbuf;
 struct nv50_query *q = nv50_query(pq);
+   boolean ret = TRUE;

 if (!pq)
return FALSE;
@@ -211,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
nv50_query_get(push, q, 0x10, 0x5002);
break;
 default:
+  if ((q-type = NV50_HW_PM_QUERY(0)  q-type = 
NV50_HW_PM_QUERY_LAST)) {
+ ret = nv50_hw_pm_query_begin(nv50, q);
+  }
break;
 }
 q-state = NV50_QUERY_STATE_ACTIVE;
-   return true;
+   return ret;
  }

  static void
@@ -274,7 +306,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query 
*pq)
q-state = NV50_QUERY_STATE_READY;
break;
 default:
-  assert(0);
+  if ((q-type = NV50_HW_PM_QUERY(0)  q-type = 
NV50_HW_PM_QUERY_LAST)) {
+ nv50_hw_pm_query_end(nv50, q);
+  }
break;
 }

@@ -309,6 +343,10 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
 if (!pq)
return FALSE;

+   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST)) {
+  return nv50_hw_pm_query_result(nv50, q, wait, result);
+   }
+
 if (q-state != NV50_QUERY_STATE_READY)
nv50_query_update(q);

@@ -488,6 +526,1015 @@ nva0_so_target_save_offset(struct pipe_context *pipe,
 nv50_query_end(pipe, targ-pq);
  }

+/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */
+
+struct nv50_hw_pm_source_cfg
+{
+   const char *name;
+   uint64_t value;
+};
+
+struct nv50_hw_pm_signal_cfg
+{
+   const char *name;
+   const struct nv50_hw_pm_source_cfg src[8];
+};
+
+struct nv50_hw_pm_counter_cfg
+{
+   uint16_t logic_op;
+   const struct 

Re: [Nouveau] [RFC PATCH 6/8] nv50: add support for compute/graphics global performance counters

2015-06-25 Thread Ilia Mirkin
What's with the \%'s everywhere?

On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset
samuel.pitoi...@gmail.com wrote:
 This commit adds support for both compute and graphics global
 performance counters which have been reverse engineered with
 CUPTI (Linux) and PerfKit (Windows).

 Currently, only one query type can be monitored at the same time because
 the Gallium's HUD doesn't fit pretty well. This will be improved later.

 Signed-off-by: Samuel Pitoiset samuel.pitoi...@gmail.com
 ---
  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1057 
 +++-
  src/gallium/drivers/nouveau/nv50/nv50_screen.h |   35 +
  2 files changed, 1087 insertions(+), 5 deletions(-)

 diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
 b/src/gallium/drivers/nouveau/nv50/nv50_query.c
 index 1162110..b9d2914 100644
 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
 +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
 @@ -27,6 +27,8 @@
  #include nv50/nv50_context.h
  #include nv_object.xml.h

 +#include nouveau_perfmon.h
 +
  #define NV50_QUERY_STATE_READY   0
  #define NV50_QUERY_STATE_ACTIVE  1
  #define NV50_QUERY_STATE_ENDED   2
 @@ -51,10 +53,25 @@ struct nv50_query {
 boolean is64bit;
 struct nouveau_mm_allocation *mm;
 struct nouveau_fence *fence;
 +   struct nouveau_object *perfdom;
  };

  #define NV50_QUERY_ALLOC_SPACE 256

 +#ifdef DEBUG
 +static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args);
 +#endif
 +
 +static boolean
 +nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *);
 +static void
 +nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *);
 +static boolean
 +nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *);
 +static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *);
 +static boolean nv50_hw_pm_query_result(struct nv50_context *,
 +struct nv50_query *, boolean, void *);
 +
  static INLINE struct nv50_query *
  nv50_query(struct pipe_query *pipe)
  {
 @@ -96,12 +113,18 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
 nv50_query *q, int size)
  static void
  nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
  {
 +   struct nv50_context *nv50 = nv50_context(pipe);
 +   struct nv50_query *q = nv50_query(pq);
 +
 if (!pq)
return;

 -   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
 -   nouveau_fence_ref(NULL, nv50_query(pq)-fence);
 -   FREE(nv50_query(pq));
 +   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST))
 +  nv50_hw_pm_query_destroy(nv50, q);
 +
 +   nv50_query_allocate(nv50, q, 0);
 +   nouveau_fence_ref(NULL, q-fence);
 +   FREE(q);
  }

  static struct pipe_query *
 @@ -130,6 +153,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned 
 type, unsigned index)
q-data -= 32 / sizeof(*q-data); /* we advance before query_begin ! */
 }

 +   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST)) 
 {
 +  if (!nv50_hw_pm_query_create(nv50, q))
 + return NULL;
 +   }
 +
 return (struct pipe_query *)q;
  }

 @@ -154,6 +182,7 @@ nv50_query_begin(struct pipe_context *pipe, struct 
 pipe_query *pq)
 struct nv50_context *nv50 = nv50_context(pipe);
 struct nouveau_pushbuf *push = nv50-base.pushbuf;
 struct nv50_query *q = nv50_query(pq);
 +   boolean ret = TRUE;

 if (!pq)
return FALSE;
 @@ -211,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct 
 pipe_query *pq)
nv50_query_get(push, q, 0x10, 0x5002);
break;
 default:
 +  if ((q-type = NV50_HW_PM_QUERY(0)  q-type = 
 NV50_HW_PM_QUERY_LAST)) {
 + ret = nv50_hw_pm_query_begin(nv50, q);
 +  }
break;
 }
 q-state = NV50_QUERY_STATE_ACTIVE;
 -   return true;
 +   return ret;
  }

  static void
 @@ -274,7 +306,9 @@ nv50_query_end(struct pipe_context *pipe, struct 
 pipe_query *pq)
q-state = NV50_QUERY_STATE_READY;
break;
 default:
 -  assert(0);
 +  if ((q-type = NV50_HW_PM_QUERY(0)  q-type = 
 NV50_HW_PM_QUERY_LAST)) {
 + nv50_hw_pm_query_end(nv50, q);
 +  }
break;
 }

 @@ -309,6 +343,10 @@ nv50_query_result(struct pipe_context *pipe, struct 
 pipe_query *pq,
 if (!pq)
return FALSE;

 +   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST)) 
 {
 +  return nv50_hw_pm_query_result(nv50, q, wait, result);
 +   }
 +
 if (q-state != NV50_QUERY_STATE_READY)
nv50_query_update(q);

 @@ -488,6 +526,1015 @@ nva0_so_target_save_offset(struct pipe_context *pipe,
 nv50_query_end(pipe, targ-pq);
  }

 +/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */
 +
 +struct nv50_hw_pm_source_cfg
 +{
 +   const char *name;
 +   uint64_t value;
 +};
 +
 +struct nv50_hw_pm_signal_cfg
 +{
 +   const char *name;
 +   const struct nv50_hw_pm_source_cfg src[8];
 +};
 +
 +struct nv50_hw_pm_counter_cfg
 +{
 +   uint16_t logic_op;
 +   

[Nouveau] [RFC PATCH 6/8] nv50: add support for compute/graphics global performance counters

2015-06-22 Thread Samuel Pitoiset
This commit adds support for both compute and graphics global
performance counters which have been reverse engineered with
CUPTI (Linux) and PerfKit (Windows).

Currently, only one query type can be monitored at the same time because
the Gallium's HUD doesn't fit pretty well. This will be improved later.

Signed-off-by: Samuel Pitoiset samuel.pitoi...@gmail.com
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1057 +++-
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |   35 +
 2 files changed, 1087 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 1162110..b9d2914 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
 #include nv50/nv50_context.h
 #include nv_object.xml.h
 
+#include nouveau_perfmon.h
+
 #define NV50_QUERY_STATE_READY   0
 #define NV50_QUERY_STATE_ACTIVE  1
 #define NV50_QUERY_STATE_ENDED   2
@@ -51,10 +53,25 @@ struct nv50_query {
boolean is64bit;
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
+   struct nouveau_object *perfdom;
 };
 
 #define NV50_QUERY_ALLOC_SPACE 256
 
+#ifdef DEBUG
+static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args);
+#endif
+
+static boolean
+nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *);
+static void
+nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *);
+static boolean
+nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *);
+static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *);
+static boolean nv50_hw_pm_query_result(struct nv50_context *,
+struct nv50_query *, boolean, void *);
+
 static INLINE struct nv50_query *
 nv50_query(struct pipe_query *pipe)
 {
@@ -96,12 +113,18 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
nv50_query *q, int size)
 static void
 nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
 {
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_query *q = nv50_query(pq);
+
if (!pq)
   return;
 
-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, nv50_query(pq)-fence);
-   FREE(nv50_query(pq));
+   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST))
+  nv50_hw_pm_query_destroy(nv50, q);
+
+   nv50_query_allocate(nv50, q, 0);
+   nouveau_fence_ref(NULL, q-fence);
+   FREE(q);
 }
 
 static struct pipe_query *
@@ -130,6 +153,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned 
type, unsigned index)
   q-data -= 32 / sizeof(*q-data); /* we advance before query_begin ! */
}
 
+   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST)) {
+  if (!nv50_hw_pm_query_create(nv50, q))
+ return NULL;
+   }
+
return (struct pipe_query *)q;
 }
 
@@ -154,6 +182,7 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50-base.pushbuf;
struct nv50_query *q = nv50_query(pq);
+   boolean ret = TRUE;
 
if (!pq)
   return FALSE;
@@ -211,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
   nv50_query_get(push, q, 0x10, 0x5002);
   break;
default:
+  if ((q-type = NV50_HW_PM_QUERY(0)  q-type = 
NV50_HW_PM_QUERY_LAST)) {
+ ret = nv50_hw_pm_query_begin(nv50, q);
+  }
   break;
}
q-state = NV50_QUERY_STATE_ACTIVE;
-   return true;
+   return ret;
 }
 
 static void
@@ -274,7 +306,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query 
*pq)
   q-state = NV50_QUERY_STATE_READY;
   break;
default:
-  assert(0);
+  if ((q-type = NV50_HW_PM_QUERY(0)  q-type = 
NV50_HW_PM_QUERY_LAST)) {
+ nv50_hw_pm_query_end(nv50, q);
+  }
   break;
}
 
@@ -309,6 +343,10 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
if (!pq)
   return FALSE;
 
+   if ((q-type = NV50_HW_PM_QUERY(0)  q-type = NV50_HW_PM_QUERY_LAST)) {
+  return nv50_hw_pm_query_result(nv50, q, wait, result);
+   }
+
if (q-state != NV50_QUERY_STATE_READY)
   nv50_query_update(q);
 
@@ -488,6 +526,1015 @@ nva0_so_target_save_offset(struct pipe_context *pipe,
nv50_query_end(pipe, targ-pq);
 }
 
+/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */
+
+struct nv50_hw_pm_source_cfg
+{
+   const char *name;
+   uint64_t value;
+};
+
+struct nv50_hw_pm_signal_cfg
+{
+   const char *name;
+   const struct nv50_hw_pm_source_cfg src[8];
+};
+
+struct nv50_hw_pm_counter_cfg
+{
+   uint16_t logic_op;
+   const struct nv50_hw_pm_signal_cfg sig[4];
+};
+
+enum nv50_hw_pm_query_display
+{
+   NV50_HW_PM_EVENT_DISPLAY_RAW,
+   NV50_HW_PM_EVENT_DISPLAY_RATIO,
+};
+
+enum nv50_hw_pm_query_count
+{
+   NV50_HW_PM_EVENT_COUNT_SIMPLE,
+   NV50_HW_PM_EVENT_COUNT_B4,