On 26/10/15 20:13, Karol Herbst wrote:
From: Karol Herbst <[email protected]>
---
drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h | 788 +++++++++++++++------------
drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h | 740 ++++++++++++++-----------
drm/nouveau/nvkm/subdev/pmu/fuc/gk104.fuc4.h | 740 ++++++++++++++-----------
drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h | 710 ++++++++++++++----------
drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h | 755 ++++++++++++++-----------
drm/nouveau/nvkm/subdev/pmu/fuc/os.h | 4 +
drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc | 148 +++++
7 files changed, 2267 insertions(+), 1618 deletions(-)
diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
b/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
index c8b06cb..53508d9 100644
--- a/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
+++ b/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
@@ -49,4 +49,8 @@
#define I2C__MSG_DATA0_WR08_REG 0:7
#define I2C__MSG_DATA1_WR08_VAL 0:7
+
+/* PERF: message identifiers */
+#define PERF_MSG_LOAD 1
Could you document the expected inputs and outputs of this message as a
comment?
// IN: don't care
// OUT:
// - byte 3: PCIe load (GF100+)
// - byte 2: Memory load
// - byte 1: Video decoding engines' load
// - byte 0: GR load
+
#endif
diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
b/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
index 38eadf7..69a8f8d 100644
--- a/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
+++ b/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
@@ -30,6 +30,18 @@ process(PROC_PERF, #perf_init, #perf_recv)
* PERF data segment
*****************************************************************************/
#ifdef INCLUDE_DATA
+perf_attr_start:
+// parameters
+perf_polling_period_us: .b32 100000
+
+// engine usage percentage
It is a tiny bit misleading to say percentage when you mean per-255. How
about:
// engine usage (0-255)
+perf_eng_gr: .b8 0
+perf_eng_vdec: .b8 0
+perf_eng_mc: .b8 0
+#if NVKM_PPWR_CHIPSET >= GF100
+perf_eng_pcie: .b8 0
+#endif
+.align 4
#endif
/******************************************************************************
@@ -46,6 +58,78 @@ process(PROC_PERF, #perf_init, #perf_recv)
// $r11 - data1
// $r0 - zero
perf_recv:
+ push $r1
+
+ imm32($r10, PROC_HOST)
+ cmp b32 $r14 $r10
+ bra ne #perf_recv_not_host
+ cmp b32 $r13 PERF_MSG_LOAD
Thanks for doing this and not assuming straight away that this is going
to be the only type of messages you will receive from the host :)
+ bra e #perf_load
+ bra #perf_recv_exit
+
+perf_load:
+ clear b32 $r11
+ clear b32 $r12
+#if NVKM_PPWR_CHIPSET >= GF100
+ ld(b8, $r12, #perf_eng_pcie)
+ shl b32 $r12 8
+#endif
+ ld(b8, $r12, #perf_eng_mc)
+ shl b32 $r12 8
+ ld(b8, $r12, #perf_eng_vdec)
+ shl b32 $r12 8
+ ld(b8, $r12, #perf_eng_gr)
Simple and effective, that's nice :)
+ call(send)
+ bra #perf_recv_exit
+
+perf_recv_not_host:
+ call(perf_counter_readout)
+
+ ld(b32, $r14, #perf_polling_period_us)
+ call #ticks_from_us
+ call(timer)
+
+perf_recv_exit:
+ pop $r1
+ ret
+
+
+// description
+//
+// $r15 - current (perf)
+// $r0 - zero
+perf_counter_readout:
+ nv_iord($r14, NV_PPWR_COUNTER_COUNT(0))
+ div $r14 $r14 0xff
Not a big fan of the loss in precision. Why do you do that instead of
what I did here:
https://cgit.freedesktop.org/~mperes/nouveau/tree/nvkm/subdev/pwr/fuc/perf.fuc?h=ppwr_rework&id=27310fa8fdc39e54a3f4383fada96a3562c5a022#n134
With my solution, you get no loss in precision. And you also poll the
counters faster so it means a greater accuracy. I also miss less cycles.
All in all, could you please reuse my polling function please? :D
+
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(1))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_gr, $r13)
+
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(2))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_vdec, $r13)
+
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(3))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_mc, $r13)
+
+#if NVKM_PPWR_CHIPSET >= GF100
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(4))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_pcie, $r13)
+#endif
+
+ // reset the counters
+ imm32($r14, NV_PPWR_COUNTER_COUNT_RESET)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(0), $r14)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(1), $r14)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(2), $r14)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(3), $r14)
+#if NVKM_PPWR_CHIPSET >= GF100
+ nv_iowr(NV_PPWR_COUNTER_COUNT(4), $r14)
+#endif
+
ret
// description
@@ -53,5 +137,69 @@ perf_recv:
// $r15 - current (perf)
// $r0 - zero
perf_init:
+ // set up the total ticks counter first
+ imm32($r14, NV_PPWR_COUNTER_MODE_ALWAYS)
+ nv_iowr(NV_PPWR_COUNTER_MODE(0), $r14)
+
+ // set up the other counters, with fermi there are more
+ imm32($r14, NV_PPWR_COUNTER_MODE_IF_NOT_ALL)
+ nv_iowr(NV_PPWR_COUNTER_MODE(1), $r14)
+ nv_iowr(NV_PPWR_COUNTER_MODE(2), $r14)
+ nv_iowr(NV_PPWR_COUNTER_MODE(3), $r14)
+#if NVKM_PPWR_CHIPSET >= GF100
+ nv_iowr(NV_PPWR_COUNTER_MODE(4), $r14)
+#endif
+
+ // core load counter
+ imm32($r14,
+ NV_PPWR_COUNTER_SIG_GR_IDLE
+ | NV_PPWR_COUNTER_SIG_GR_GPC_IDLE
+ | NV_PPWR_COUNTER_SIG_GR_ROP_IDLE
+#if NVKM_PPWR_CHIPSET >= GF100
+ | NV_PPWR_COUNTER_SIG_GR_HUB_IDLE
+ | NV_PPWR_COUNTER_SIG_PCOPY0_IDLE
+ | NV_PPWR_COUNTER_SIG_PCOPY1_IDLE
+#if NVKM_PPWR_CHIPSET >= GK104
+ | NV_PPWR_COUNTER_SIG_PCOPY2_IDLE
+#endif
+#endif
+ )
+ nv_iowr(NV_PPWR_COUNTER_MASK(1), $r14)
+
+ // video load counter
+ imm32($r14,
+ NV_PPWR_COUNTER_SIG_PVLD_IDLE
+ | NV_PPWR_COUNTER_SIG_PPDEC_IDLE
+ | NV_PPWR_COUNTER_SIG_PPPP_IDLE
+#if NVKM_PPWR_CHIPSET >= GK104
+ | NV_PPWR_COUNTER_SIG_PVENC
+#endif
+ )
+ nv_iowr(NV_PPWR_COUNTER_MASK(2), $r14)
+
+ // memory load counter
+ imm32($r14,
+#if NVKM_PPWR_CHIPSET >= GF100
+ NV_PPWR_COUNTER_SIG_BFB_PART0_REQ
+#else
+ NV_PPWR_COUNTER_SIG_FB_PART0_REQ
+#endif
+ )
+ nv_iowr(NV_PPWR_COUNTER_MASK(3), $r14)
+
+ // pcie load counter
+#if NVKM_PPWR_CHIPSET >= GF100
+ imm32($r14, NV_PPWR_COUNTER_SIG_PCIE)
+ nv_iowr(NV_PPWR_COUNTER_MASK(4), $r14)
+#endif
+
+ // initial read out
+ call(perf_counter_readout)
+
+ // schedule the next read out
+ ld(b32, $r14, #perf_polling_period_us)
+ call #ticks_from_us
+ call(timer)
+
ret
#endif
Looks about good, except the polling function!
_______________________________________________
Nouveau mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/nouveau