[PATCH] drm/i915/hwmon: Get rid of devm
When both hwmon and hwmon drvdata (on which hwmon depends) are device managed resources, the expectation, on device unbind, is that hwmon will be released before drvdata. However, in i915 there are two separate code paths, which both release either drvdata or hwmon and either can be released before the other. These code paths (for device unbind) are as follows (see also the bug referenced below): Call Trace: release_nodes+0x11/0x70 devres_release_group+0xb2/0x110 component_unbind_all+0x8d/0xa0 component_del+0xa5/0x140 intel_pxp_tee_component_fini+0x29/0x40 [i915] intel_pxp_fini+0x33/0x80 [i915] i915_driver_remove+0x4c/0x120 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x32/0xa0 device_release_driver_internal+0x19c/0x200 unbind_store+0x9c/0xb0 and Call Trace: release_nodes+0x11/0x70 devres_release_all+0x8a/0xc0 device_unbind_cleanup+0x9/0x70 device_release_driver_internal+0x1c1/0x200 unbind_store+0x9c/0xb0 This means that in i915, if use devm, we cannot gurantee that hwmon will always be released before drvdata. Which means that we have a uaf if hwmon sysfs is accessed when drvdata has been released but hwmon hasn't. The only way out of this seems to be do get rid of devm_ and release/free everything explicitly during device unbind. v2: Change commit message and other minor code changes v3: Cleanup from i915_hwmon_register on error (Armin Wolf) v4: Eliminate potential static analyzer warning (Rodrigo) Eliminate fetch_and_zero (Jani) v5: Restore previous logic for ddat_gt->hwmon_dev error return (Andi) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366 Reviewed-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 46 +-- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index b758fd110c20..c0662a022f59 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!IS_DGFX(i915)) return; - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; @@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915) hwm_get_preregistration_info(i915); /* hwmon_dev points to device hwmon */ - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, -ddat, -_chip_info, -hwm_groups); - if (IS_ERR(hwmon_dev)) { - i915->hwmon = NULL; - return; - } + hwmon_dev = hwmon_device_register_with_info(dev, ddat->name, + ddat, + _chip_info, + hwm_groups); + if (IS_ERR(hwmon_dev)) + goto err; ddat->hwmon_dev = hwmon_dev; @@ -839,16 +837,36 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) continue; - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, -ddat_gt, - _gt_chip_info, -NULL); + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + _gt_chip_info, + NULL); if (!IS_ERR(hwmon_dev)) ddat_gt->hwmon_dev = hwmon_dev; } + return; +err: + i915_hwmon_unregister(i915); } void i915_hwmon_unregister(struct drm_i915_private *i915) { - fetch_and_zero(>hwmon); + struct i915_hwmon *hwmon = i915->hwmon; + struct intel_gt *gt; + int i; + + if (!hwmon) + return; + + for_each_gt(gt, i915, i) + if (hwmon->ddat_gt[i].hwmon_dev) + hwmon_device_unregister(hwmon->ddat_gt[i].hwmon_dev); + + if (hwmon->ddat.hwmon_dev) + hwmon_device_unregister(hwmon->ddat.hwmon_dev); + + mutex_destroy(>hwmon_lock); + + kfree(i915->hwmon); + i915->hwmon = NULL; } -- 2.41.0
[PATCH v4] drm/i915/hwmon: Get rid of devm
When both hwmon and hwmon drvdata (on which hwmon depends) are device managed resources, the expectation, on device unbind, is that hwmon will be released before drvdata. However, in i915 there are two separate code paths, which both release either drvdata or hwmon and either can be released before the other. These code paths (for device unbind) are as follows (see also the bug referenced below): Call Trace: release_nodes+0x11/0x70 devres_release_group+0xb2/0x110 component_unbind_all+0x8d/0xa0 component_del+0xa5/0x140 intel_pxp_tee_component_fini+0x29/0x40 [i915] intel_pxp_fini+0x33/0x80 [i915] i915_driver_remove+0x4c/0x120 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x32/0xa0 device_release_driver_internal+0x19c/0x200 unbind_store+0x9c/0xb0 and Call Trace: release_nodes+0x11/0x70 devres_release_all+0x8a/0xc0 device_unbind_cleanup+0x9/0x70 device_release_driver_internal+0x1c1/0x200 unbind_store+0x9c/0xb0 This means that in i915, if use devm, we cannot gurantee that hwmon will always be released before drvdata. Which means that we have a uaf if hwmon sysfs is accessed when drvdata has been released but hwmon hasn't. The only way out of this seems to be do get rid of devm_ and release/free everything explicitly during device unbind. v2: Change commit message and other minor code changes v3: Cleanup from i915_hwmon_register on error (Armin Wolf) v4: Eliminate potential static analyzer warning (Rodrigo) Eliminate fetch_and_zero (Jani) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366 Reviewed-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 52 +-- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index b758fd110c20..1551a40a675e 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!IS_DGFX(i915)) return; - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; @@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915) hwm_get_preregistration_info(i915); /* hwmon_dev points to device hwmon */ - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, -ddat, -_chip_info, -hwm_groups); - if (IS_ERR(hwmon_dev)) { - i915->hwmon = NULL; - return; - } + hwmon_dev = hwmon_device_register_with_info(dev, ddat->name, + ddat, + _chip_info, + hwm_groups); + if (IS_ERR(hwmon_dev)) + goto err; ddat->hwmon_dev = hwmon_dev; @@ -839,16 +837,38 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) continue; - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, -ddat_gt, - _gt_chip_info, -NULL); - if (!IS_ERR(hwmon_dev)) - ddat_gt->hwmon_dev = hwmon_dev; + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + _gt_chip_info, + NULL); + if (IS_ERR(hwmon_dev)) + goto err; + + ddat_gt->hwmon_dev = hwmon_dev; } + return; +err: + i915_hwmon_unregister(i915); } void i915_hwmon_unregister(struct drm_i915_private *i915) { - fetch_and_zero(>hwmon); + struct i915_hwmon *hwmon = i915->hwmon; + struct intel_gt *gt; + int i; + + if (!hwmon) + return; + + for_each_gt(gt, i915, i) + if (hwmon->ddat_gt[i].hwmon_dev) + hwmon_device_unregister(hwmon->ddat_gt[i].hwmon_dev); + + if (hwmon->ddat.hwmon_dev) + hwmon_device_unregister(hwmon->ddat.hwmon_dev); + + mutex_destroy(>hwmon_lock); + + kfree(i915->hwmon); + i915->hwmon = NULL; } -- 2.41.0
[PATCH v3] drm/i915/hwmon: Get rid of devm
When both hwmon and hwmon drvdata (on which hwmon depends) are device managed resources, the expectation, on device unbind, is that hwmon will be released before drvdata. However, in i915 there are two separate code paths, which both release either drvdata or hwmon and either can be released before the other. These code paths (for device unbind) are as follows (see also the bug referenced below): Call Trace: release_nodes+0x11/0x70 devres_release_group+0xb2/0x110 component_unbind_all+0x8d/0xa0 component_del+0xa5/0x140 intel_pxp_tee_component_fini+0x29/0x40 [i915] intel_pxp_fini+0x33/0x80 [i915] i915_driver_remove+0x4c/0x120 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x32/0xa0 device_release_driver_internal+0x19c/0x200 unbind_store+0x9c/0xb0 and Call Trace: release_nodes+0x11/0x70 devres_release_all+0x8a/0xc0 device_unbind_cleanup+0x9/0x70 device_release_driver_internal+0x1c1/0x200 unbind_store+0x9c/0xb0 This means that in i915, if use devm, we cannot gurantee that hwmon will always be released before drvdata. Which means that we have a uaf if hwmon sysfs is accessed when drvdata has been released but hwmon hasn't. The only way out of this seems to be do get rid of devm_ and release/free everything explicitly during device unbind. v2: Change commit message and other minor code changes v3: Cleanup from i915_hwmon_register on error (Armin Wolf) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 54 ++- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index b758fd110c20..8cebf6f5b101 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!IS_DGFX(i915)) return; - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; @@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915) hwm_get_preregistration_info(i915); /* hwmon_dev points to device hwmon */ - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, -ddat, -_chip_info, -hwm_groups); - if (IS_ERR(hwmon_dev)) { - i915->hwmon = NULL; - return; - } + hwmon_dev = hwmon_device_register_with_info(dev, ddat->name, + ddat, + _chip_info, + hwm_groups); + if (IS_ERR(hwmon_dev)) + goto err; ddat->hwmon_dev = hwmon_dev; @@ -839,16 +837,40 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) continue; - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, -ddat_gt, - _gt_chip_info, -NULL); - if (!IS_ERR(hwmon_dev)) - ddat_gt->hwmon_dev = hwmon_dev; + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + _gt_chip_info, + NULL); + if (IS_ERR(hwmon_dev)) + goto err; + + ddat_gt->hwmon_dev = hwmon_dev; } + return; +err: + i915_hwmon_unregister(i915); } void i915_hwmon_unregister(struct drm_i915_private *i915) { - fetch_and_zero(>hwmon); + struct i915_hwmon *hwmon = fetch_and_zero(>hwmon); + struct hwm_drvdata *ddat = >ddat; + struct intel_gt *gt; + int i; + + if (!hwmon) + return; + + for_each_gt(gt, i915, i) { + struct hwm_drvdata *ddat_gt = hwmon->ddat_gt + i; + + if (ddat_gt->hwmon_dev) + hwmon_device_unregister(ddat_gt->hwmon_dev); + } + + if (ddat->hwmon_dev) + hwmon_device_unregister(ddat->hwmon_dev); + + mutex_destroy(>hwmon_lock); + kfree(hwmon); } -- 2.41.0
[PATCH] drm/i915/hwmon: Get rid of devm
When both hwmon and hwmon drvdata (on which hwmon depends) are device managed resources, the expectation, on device unbind, is that hwmon will be released before drvdata. However, in i915 there are two separate code paths, which both release either drvdata or hwmon and either can be released before the other. These code paths (for device unbind) are as follows (see also the bug referenced below): Call Trace: release_nodes+0x11/0x70 devres_release_group+0xb2/0x110 component_unbind_all+0x8d/0xa0 component_del+0xa5/0x140 intel_pxp_tee_component_fini+0x29/0x40 [i915] intel_pxp_fini+0x33/0x80 [i915] i915_driver_remove+0x4c/0x120 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x32/0xa0 device_release_driver_internal+0x19c/0x200 unbind_store+0x9c/0xb0 and Call Trace: release_nodes+0x11/0x70 devres_release_all+0x8a/0xc0 device_unbind_cleanup+0x9/0x70 device_release_driver_internal+0x1c1/0x200 unbind_store+0x9c/0xb0 This means that in i915, if use devm, we cannot gurantee that hwmon will always be released before drvdata. Which means that we have a uaf if hwmon sysfs is accessed when drvdata has been released but hwmon hasn't. The only way out of this seems to be do get rid of devm_ and release/free everything explicitly during device unbind. v2: Change commit message and other minor code changes v3: Cleanup from i915_hwmon_register on error (Armin Wolf) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 54 ++- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index b758fd110c20..8cebf6f5b101 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!IS_DGFX(i915)) return; - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; @@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915) hwm_get_preregistration_info(i915); /* hwmon_dev points to device hwmon */ - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, -ddat, -_chip_info, -hwm_groups); - if (IS_ERR(hwmon_dev)) { - i915->hwmon = NULL; - return; - } + hwmon_dev = hwmon_device_register_with_info(dev, ddat->name, + ddat, + _chip_info, + hwm_groups); + if (IS_ERR(hwmon_dev)) + goto err; ddat->hwmon_dev = hwmon_dev; @@ -839,16 +837,40 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) continue; - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, -ddat_gt, - _gt_chip_info, -NULL); - if (!IS_ERR(hwmon_dev)) - ddat_gt->hwmon_dev = hwmon_dev; + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + _gt_chip_info, + NULL); + if (IS_ERR(hwmon_dev)) + goto err; + + ddat_gt->hwmon_dev = hwmon_dev; } + return; +err: + i915_hwmon_unregister(i915); } void i915_hwmon_unregister(struct drm_i915_private *i915) { - fetch_and_zero(>hwmon); + struct i915_hwmon *hwmon = fetch_and_zero(>hwmon); + struct hwm_drvdata *ddat = >ddat; + struct intel_gt *gt; + int i; + + if (!hwmon) + return; + + for_each_gt(gt, i915, i) { + struct hwm_drvdata *ddat_gt = hwmon->ddat_gt + i; + + if (ddat_gt->hwmon_dev) + hwmon_device_unregister(ddat_gt->hwmon_dev); + } + + if (ddat->hwmon_dev) + hwmon_device_unregister(ddat->hwmon_dev); + + mutex_destroy(>hwmon_lock); + kfree(hwmon); } -- 2.41.0
[PATCH v2] drm/i915/hwmon: Get rid of devm
When both hwmon and hwmon drvdata (on which hwmon depends) are device managed resources, the expectation, on device unbind, is that hwmon will be released before drvdata. However, in i915 there are two separate code paths, which both release either drvdata or hwmon and either can be released before the other. These code paths (for device unbind) are as follows (see also the bug referenced below): Call Trace: release_nodes+0x11/0x70 devres_release_group+0xb2/0x110 component_unbind_all+0x8d/0xa0 component_del+0xa5/0x140 intel_pxp_tee_component_fini+0x29/0x40 [i915] intel_pxp_fini+0x33/0x80 [i915] i915_driver_remove+0x4c/0x120 [i915] i915_pci_remove+0x19/0x30 [i915] pci_device_remove+0x32/0xa0 device_release_driver_internal+0x19c/0x200 unbind_store+0x9c/0xb0 and Call Trace: release_nodes+0x11/0x70 devres_release_all+0x8a/0xc0 device_unbind_cleanup+0x9/0x70 device_release_driver_internal+0x1c1/0x200 unbind_store+0x9c/0xb0 This means that in i915, if use devm, we cannot gurantee that hwmon will always be released before drvdata. Which means that we have a uaf if hwmon sysfs is accessed when drvdata has been released but hwmon hasn't. The only way out of this seems to be do get rid of devm_ and release/free everything explicitly during device unbind. v2: Change commit message and other minor code changes Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 41 +++ 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8c3f443c8347..46c24b1ee6df 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -792,7 +792,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!IS_DGFX(i915)) return; - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); + hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; @@ -818,10 +818,10 @@ void i915_hwmon_register(struct drm_i915_private *i915) hwm_get_preregistration_info(i915); /* hwmon_dev points to device hwmon */ - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name, -ddat, -_chip_info, -hwm_groups); + hwmon_dev = hwmon_device_register_with_info(dev, ddat->name, + ddat, + _chip_info, + hwm_groups); if (IS_ERR(hwmon_dev)) { i915->hwmon = NULL; return; @@ -838,10 +838,10 @@ void i915_hwmon_register(struct drm_i915_private *i915) if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0)) continue; - hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name, -ddat_gt, - _gt_chip_info, -NULL); + hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name, + ddat_gt, + _gt_chip_info, + NULL); if (!IS_ERR(hwmon_dev)) ddat_gt->hwmon_dev = hwmon_dev; } @@ -849,5 +849,26 @@ void i915_hwmon_register(struct drm_i915_private *i915) void i915_hwmon_unregister(struct drm_i915_private *i915) { - fetch_and_zero(>hwmon); + struct i915_hwmon *hwmon = fetch_and_zero(>hwmon); + struct hwm_drvdata *ddat = >ddat; + struct intel_gt *gt; + int i; + + if (!hwmon) + return; + + for_each_gt(gt, i915, i) { + struct hwm_drvdata *ddat_gt = hwmon->ddat_gt + i; + + if (ddat_gt->hwmon_dev) { + hwmon_device_unregister(ddat_gt->hwmon_dev); + ddat_gt->hwmon_dev = NULL; + } + } + + if (ddat->hwmon_dev) + hwmon_device_unregister(ddat->hwmon_dev); + + mutex_destroy(>hwmon_lock); + kfree(hwmon); } -- 2.41.0
[PATCH] drm/i915/perf: Remove gtt_offset from stream->oa_buffer.head/.tail
There is no reason to add gtt_offset to the cached head/tail pointers stream->oa_buffer.head and stream->oa_buffer.tail. This causes the code to constantly add gtt_offset and subtract gtt_offset and is error prone. It is much simpler to maintain stream->oa_buffer.head and stream->oa_buffer.tail without adding gtt_offset to them and just allow for the gtt_offset when reading/writing from/to HW registers. v2: Minor tweak to commit message due to dropping patch in previous series Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa --- drivers/gpu/drm/i915/i915_perf.c | 52 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 018f42fff4cc0..1347e4ec9dd5a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -543,10 +543,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); int report_size = stream->oa_buffer.format->size; - u32 head, tail, read_tail; + u32 tail, hw_tail; unsigned long flags; bool pollin; - u32 hw_tail; u32 partial_report_size; /* We have to consider the (unlikely) possibility that read() errors @@ -556,6 +555,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) spin_lock_irqsave(>oa_buffer.ptr_lock, flags); hw_tail = stream->perf->ops.oa_hw_tail_read(stream); + hw_tail -= gtt_offset; /* The tail pointer increases in 64 byte increments, not in report_size * steps. Also the report size may not be a power of 2. Compute @@ -567,13 +567,6 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) /* Subtract partial amount off the tail */ hw_tail = OA_TAKEN(hw_tail, partial_report_size); - /* NB: The head we observe here might effectively be a little -* out of date. If a read() is in progress, the head could be -* anywhere between this head and stream->oa_buffer.tail. -*/ - head = stream->oa_buffer.head - gtt_offset; - read_tail = stream->oa_buffer.tail - gtt_offset; - tail = hw_tail; /* Walk the stream backward until we find a report with report @@ -587,7 +580,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) * memory in the order they were written to. * If not : (╯°□°)╯︵ ┻━┻ */ - while (OA_TAKEN(tail, read_tail) >= report_size) { + while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { void *report = stream->oa_buffer.vaddr + tail; if (oa_report_id(stream, report) || @@ -601,9 +594,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) __ratelimit(>perf->tail_pointer_race)) drm_notice(>uncore->i915->drm, "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", -head, tail, hw_tail); +stream->oa_buffer.head, tail, hw_tail); - stream->oa_buffer.tail = gtt_offset + tail; + stream->oa_buffer.tail = tail; pollin = OA_TAKEN(stream->oa_buffer.tail, stream->oa_buffer.head) >= report_size; @@ -753,13 +746,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(>oa_buffer.ptr_lock, flags); - /* -* NB: oa_buffer.head/tail include the gtt_offset which we don't want -* while indexing relative to oa_buf_base. -*/ - head -= gtt_offset; - tail -= gtt_offset; - /* * An out of bounds or misaligned head or tail pointer implies a driver * bug since we validate + align the tail pointers we read from the @@ -895,9 +881,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * We removed the gtt_offset for the copy loop above, indexing * relative to oa_buf_base so put back here... */ - head += gtt_offset; intel_uncore_write(uncore, oaheadptr, - head & GEN12_OAG_OAHEADPTR_MASK); + (head + gtt_offset) & GEN12_OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(>oa_buffer.ptr_lock, flags); @@ -1042,12 +1027,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(>oa_buffer.ptr_lock, flags); - /* NB: oa_buffer.head/tail include the gtt_offset which we don't want -* while indexing relative to oa_buf_base. -*/ - head -= gtt_offset; - tail -= gtt_offset;
[PATCH] dim: Disallow remote branch deletions with 'dim push'
An inadvertent 'dim push -d' can delete remote branches. Disallow such remote branch deletions. Signed-off-by: Ashutosh Dixit --- dim | 6 ++ 1 file changed, 6 insertions(+) diff --git a/dim b/dim index 126568e..e5899e6 100755 --- a/dim +++ b/dim @@ -1029,6 +1029,12 @@ function dim_push_branch fi fi + # Disallow remote branch deletions, say with 'dim push -d' + if [[ "$@" == *"-d"* ]]; then + echoerr "Attempt to delete remote branch, aborting." + return 1 + fi + git_push $remote $branch "$@" update_linux_next $branch drm-intel-next drm-intel-next-fixes drm-intel-fixes -- 2.38.0
[PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
No functional changes but we can remove some unsightly index computation and read/write functions if we convert the PMU sample array from a one-dimensional to a two-dimensional array. v2: Retain read/store helpers (Tvrtko) Suggested-by: Tvrtko Ursulin Reviewed-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_pmu.c | 16 +++- drivers/gpu/drm/i915/i915_pmu.h | 2 +- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 09313cf9316b4..f96fe92dca4e4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -191,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt) return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); } -static unsigned int -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample) -{ - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample; - - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample)); - - return idx; -} - static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) { - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur; + return pmu->sample[gt_id][sample].cur; } static void store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) { - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val; + pmu->sample[gt_id][sample].cur = val; } static void add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) { - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul); + pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul); } static u64 get_rc6(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 33d80fbaab8bc..d20592e7db999 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -127,7 +127,7 @@ struct i915_pmu { * Only global counters are held here, while the per-engine ones are in * struct intel_engine_cs. */ - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS]; + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS]; /** * @sleep_last: Last time GT parked for RC6 estimation. */ -- 2.38.0
[PATCH 1/2] drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked
pmu_needs_timer() keeps the timer running even when GT is parked, ostensibly to sample requested/actual frequencies. However frequency_sample() has the following: /* Report 0/0 (actual/requested) frequency while parked. */ if (!intel_gt_pm_get_if_awake(gt)) return; The above code prevents frequencies to be sampled while the GT is parked. So we might as well turn off the sampling timer itself in this case and save CPU cycles/power. v2: Instead of turning freq bits off, return false, since no counters will run after this change when GT is parked (Tvrtko) v3: Remove gpu_active argument of pmu_needs_timer (Andrzej) Signed-off-by: Ashutosh Dixit Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_pmu.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index a814583e19fd7..09313cf9316b4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -139,7 +139,7 @@ static u32 frequency_enabled_mask(void) return mask; } -static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) +static bool pmu_needs_timer(struct i915_pmu *pmu) { struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); u32 enable; @@ -157,17 +157,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) */ enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; - /* -* When the GPU is idle per-engine counters do not need to be -* running so clear those bits out. -*/ - if (!gpu_active) - enable &= ~ENGINE_SAMPLE_MASK; /* * Also there is software busyness tracking available we do not * need the timer for I915_SAMPLE_BUSY counter. */ - else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) + if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) enable &= ~BIT(I915_SAMPLE_BUSY); /* @@ -295,7 +289,7 @@ static void park_rc6(struct intel_gt *gt) static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) { - if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { + if (!pmu->timer_enabled && pmu_needs_timer(pmu)) { pmu->timer_enabled = true; pmu->timer_last = ktime_get(); hrtimer_start_range_ns(>timer, @@ -321,7 +315,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt) */ pmu->unparked &= ~BIT(gt->info.id); if (pmu->unparked == 0) - pmu->timer_enabled = pmu_needs_timer(pmu, false); + pmu->timer_enabled = false; spin_unlock_irq(>lock); } @@ -827,7 +821,7 @@ static void i915_pmu_disable(struct perf_event *event) */ if (--pmu->enable_count[bit] == 0) { pmu->enable &= ~BIT(bit); - pmu->timer_enabled &= pmu_needs_timer(pmu, true); + pmu->timer_enabled &= pmu_needs_timer(pmu); } spin_unlock_irqrestore(>lock, flags); -- 2.38.0
[PATCH v2 0/2] drm/i915/pmu: couple of cleanups
Cc: Andrzej Hajda Cc: Tvrtko Ursulin Signed-off-by: Ashutosh Dixit Ashutosh Dixit (2): drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked drm/i915/pmu: Make PMU sample array two-dimensional drivers/gpu/drm/i915/i915_pmu.c | 32 drivers/gpu/drm/i915/i915_pmu.h | 2 +- 2 files changed, 9 insertions(+), 25 deletions(-) -- 2.38.0
[PATCH] drm/i915/perf: Clear out entire reports after reading if not power of 2 size
Clearing out report id and timestamp as means to detect unlanded reports only works if report size is power of 2. That is, only when report size is a sub-multiple of the OA buffer size can we be certain that reports will land at the same place each time in the OA buffer (after rewind). If report size is not a power of 2, we need to zero out the entire report to be able to detect unlanded reports reliably. v2: Add Fixes tag (Umesh) Fixes: 1cc064dce4ed ("drm/i915/perf: Add support for OA media units") Reviewed-by: Umesh Nerlige Ramappa Reviewed-by: Lionel Landwerlin Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_perf.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 19d5652300eeb..58284156428dc 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, stream->oa_buffer.last_ctx_id = ctx_id; } - /* -* Clear out the report id and timestamp as a means to detect unlanded -* reports. -*/ - oa_report_id_clear(stream, report32); - oa_timestamp_clear(stream, report32); + if (is_power_of_2(report_size)) { + /* +* Clear out the report id and timestamp as a means +* to detect unlanded reports. +*/ + oa_report_id_clear(stream, report32); + oa_timestamp_clear(stream, report32); + } else { + /* Zero out the entire report */ + memset(report32, 0, report_size); + } } if (start_offset != *offset) { -- 2.38.0
[PATCH 1/2] drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked
pmu_needs_timer() keeps the timer running even when GT is parked, ostensibly to sample requested/actual frequencies. However frequency_sample() has the following: /* Report 0/0 (actual/requested) frequency while parked. */ if (!intel_gt_pm_get_if_awake(gt)) return; The above code prevents frequencies to be sampled while the GT is parked. So we might as well turn off the sampling timer itself in this case and save CPU cycles/power. v2: Instead of turning freq bits off, return false, since no counters will run after this change when GT is parked (Tvrtko) Signed-off-by: Ashutosh Dixit Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_pmu.c | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index a814583e19fd7..b47d890d4ada1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -144,6 +144,10 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); u32 enable; + /* When GPU is idle, at present no counters need to run */ + if (!gpu_active) + return false; + /* * Only some counters need the sampling timer. * @@ -157,17 +161,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) */ enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; - /* -* When the GPU is idle per-engine counters do not need to be -* running so clear those bits out. -*/ - if (!gpu_active) - enable &= ~ENGINE_SAMPLE_MASK; /* * Also there is software busyness tracking available we do not * need the timer for I915_SAMPLE_BUSY counter. */ - else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) + if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) enable &= ~BIT(I915_SAMPLE_BUSY); /* -- 2.38.0
[PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional
No functional changes but we can remove some unsightly index computation and read/write functions if we convert the PMU sample array from a one-dimensional to a two-dimensional array. Suggested-by: Tvrtko Ursulin Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_pmu.c | 60 ++--- drivers/gpu/drm/i915/i915_pmu.h | 2 +- 2 files changed, 19 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index b47d890d4ada1..137e0df9573ee 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt) return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); } -static unsigned int -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample) -{ - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample; - - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample)); - - return idx; -} - -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) -{ - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur; -} - -static void -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) -{ - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val; -} - -static void -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) -{ - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul); -} - static u64 get_rc6(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt) spin_lock_irqsave(>lock, flags); if (awake) { - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val; } else { /* * We think we are runtime suspended. @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt) * counter value. */ val = ktime_since_raw(pmu->sleep_last[gt_id]); - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6); + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur; } - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED)) - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED); + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur) + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur; else - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val); + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; spin_unlock_irqrestore(>lock, flags); @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu) with_intel_runtime_pm(gt->uncore->rpm, wakeref) { u64 val = __get_rc6(gt); - store_sample(pmu, i, __I915_SAMPLE_RC6, val); - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED, -val); + pmu->sample[i][__I915_SAMPLE_RC6].cur = val; + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; pmu->sleep_last[i] = ktime_get_raw(); } } @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt) { struct i915_pmu *pmu = >i915->pmu; - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt)); + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt); pmu->sleep_last[gt->info.id] = ktime_get_raw(); } @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) } } +static void +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) +{ + sample->cur += mul_u32_u32(val, mul); +} + static bool frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt) { @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (!val) val = intel_gpu_freq(rps, rps->cur_freq); - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT, + add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_ACT], val, period_ns / 1000); } if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) { - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ, + add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_REQ], intel_rps_get_requested_frequency(rps), period_ns / 1000); } @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
[PATCH 0/2] drm/i915/pmu: couple of cleanups
Ashutosh Dixit (2): drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked drm/i915/pmu: Make PMU sample array two-dimensional drivers/gpu/drm/i915/i915_pmu.c | 72 +++-- drivers/gpu/drm/i915/i915_pmu.h | 2 +- 2 files changed, 24 insertions(+), 50 deletions(-) -- 2.38.0
[PATCH] drm/i915/perf: Clear out entire reports after reading if not power of 2 size
Clearing out report id and timestamp as means to detect unlanded reports only works if report size is power of 2. That is, only when report size is a sub-multiple of the OA buffer size can we be certain that reports will land at the same place each time in the OA buffer (after rewind). If report size is not a power of 2, we need to zero out the entire report to be able to detect unlanded reports reliably. Cc: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_perf.c | 17 +++-- 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 19d5652300eeb..58284156428dc 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, stream->oa_buffer.last_ctx_id = ctx_id; } - /* -* Clear out the report id and timestamp as a means to detect unlanded -* reports. -*/ - oa_report_id_clear(stream, report32); - oa_timestamp_clear(stream, report32); + if (is_power_of_2(report_size)) { + /* +* Clear out the report id and timestamp as a means +* to detect unlanded reports. +*/ + oa_report_id_clear(stream, report32); + oa_timestamp_clear(stream, report32); + } else { + /* Zero out the entire report */ + memset(report32, 0, report_size); + } } if (start_offset != *offset) { -- 2.38.0
[PATCH v2] drm/i915/hwmon: Silence UBSAN uninitialized bool variable warning
Loading i915 on UBSAN enabled kernels (CONFIG_UBSAN/CONFIG_UBSAN_BOOL) causes the following warning: UBSAN: invalid-load in drivers/gpu/drm/i915/gt/uc/intel_uc.c:558:2 load of value 255 is not a valid value for type '_Bool' Call Trace: dump_stack_lvl+0x57/0x7d ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value.cold+0x43/0x48 __uc_init_hw+0x76a/0x903 [i915] ... i915_driver_probe+0xfb1/0x1eb0 [i915] i915_pci_probe+0xbe/0x2d0 [i915] The warning happens because during probe i915_hwmon is still not available which results in the output boolean variable *old remaining uninitialized. Silence the warning by initializing the variable to an arbitrary value. v2: Move variable initialization to the declaration (Andi) Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 1381943b8973d..c8b9cbb7ba3a9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -465,7 +465,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; int ret, attempts; - bool pl1en; + bool pl1en = false; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); -- 2.38.0
[PATCH] drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked
pmu_needs_timer() keeps the timer running even when GT is parked, ostensibly to sample requested/actual frequencies. However frequency_sample() has the following: /* Report 0/0 (actual/requested) frequency while parked. */ if (!intel_gt_pm_get_if_awake(gt)) return; The above code prevents frequencies to be sampled while the GT is parked. So we might as well turn off the sampling timer itself in this case and save CPU cycles/power. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_pmu.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 7ece883a7d956..8db1d681cf4ab 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -124,11 +124,14 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) ENGINE_SAMPLE_MASK; /* -* When the GPU is idle per-engine counters do not need to be -* running so clear those bits out. +* When GPU is idle, frequency or per-engine counters do not need +* to be running so clear those bits out. */ - if (!gpu_active) - enable &= ~ENGINE_SAMPLE_MASK; + if (!gpu_active) { + enable &= ~(config_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_mask(I915_PMU_REQUESTED_FREQUENCY) | + ENGINE_SAMPLE_MASK); + } /* * Also there is software busyness tracking available we do not * need the timer for I915_SAMPLE_BUSY counter. -- 2.38.0
[PATCH] drm/i915/hwmon: Silence UBSAN uninitialized bool variable warning
Loading i915 on UBSAN enabled kernels (CONFIG_UBSAN/CONFIG_UBSAN_BOOL) causes the following warning: UBSAN: invalid-load in drivers/gpu/drm/i915/gt/uc/intel_uc.c:558:2 load of value 255 is not a valid value for type '_Bool' Call Trace: dump_stack_lvl+0x57/0x7d ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value.cold+0x43/0x48 __uc_init_hw+0x76a/0x903 [i915] ... i915_driver_probe+0xfb1/0x1eb0 [i915] i915_pci_probe+0xbe/0x2d0 [i915] The warning happens because during probe i915_hwmon is still not available which results in the output boolean variable *old remaining uninitialized. Silence the warning by initializing the variable to an arbitrary value. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index a3bdd9f68a458..685663861bc0b 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -502,8 +502,11 @@ void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) struct i915_hwmon *hwmon = i915->hwmon; u32 r; - if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) { + /* Fix uninitialized bool variable warning */ + *old = false; return; + } mutex_lock(>hwmon_lock); -- 2.38.0
[PATCH v6 0/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
v6: Update Patch 3 to remove the timeout when blocked v1-v5: Please see individual patches for revision history Ashutosh Dixit (3): drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write drm/i915/guc: Disable PL1 power limit when loading GuC firmware drm/i915/hwmon: Block waiting for GuC reset to complete drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++- drivers/gpu/drm/i915/i915_hwmon.c | 87 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 +++ 3 files changed, 93 insertions(+), 14 deletions(-) -- 2.38.0
[PATCH 2/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation v4: - Don't hold the lock across GuC reset (Rodrigo) - New locking scheme (suggested by Rodrigo) - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko) v5: - Fix uninitialized pl1en variable compile warning reported by kernel build robot by creating new err_rps label Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++-- drivers/gpu/drm/i915/i915_hwmon.c | 40 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 + 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cba..996168312340e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; int ret, attempts; + bool pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -500,7 +505,7 @@ static int __uc_init_hw(struct intel_uc *uc) */ ret = __uc_sanitize(uc); if (ret) - goto err_out; + goto err_rps; intel_huc_fw_upload(huc); intel_guc_ads_reset(guc); @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } + i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -559,10 +566,12 @@ static int __uc_init_hw(struct intel_uc *uc) intel_guc_submission_disable(guc); err_log_capture: __uc_capture_load_err_log(uc); -err_out: +err_rps: /* Return GT back to RPn */ intel_rps_lower_unslice(_to_gt(uc)->rps); + i915_hwmon_power_max_restore(gt->i915, pl1en); +err_out: __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 7f44e809ca155..9ab8971679fe3 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -50,6 +50,7 @@ struct hwm_drvdata { struct hwm_energy_info ei; /* Energy info for energy1_input */ char name[12]; int gt_n; + bool reset_in_progress; }; struct i915_hwmon { @@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) u32 nval; mutex_lock(>hwmon_lock); + if (hwmon->ddat.reset_in_progress) { + ret = -EAGAIN; + goto unlock; + } wakeref = intel_runtime_pm_get(ddat->uncore->rpm); /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ @@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: intel_runtime_pm_put(ddat->uncore->rpm, wakeref); +unlock: mutex_unlock(>hwmon_lock); return ret; } @@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) +{ + struct i915_hwmon *hwmon = i915->hwmon; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + mutex_
[PATCH 3/3] drm/i915/hwmon: Block waiting for GuC reset to complete
Instead of erroring out when GuC reset is in progress, block waiting for GuC reset to complete which is a more reasonable uapi behavior. v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo) v3: Remove timeout when blocked (Tvrtko) Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_hwmon.c | 29 + 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 9ab8971679fe3..a3bdd9f68a458 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -51,6 +51,7 @@ struct hwm_drvdata { char name[12]; int gt_n; bool reset_in_progress; + wait_queue_head_t waitq; }; struct i915_hwmon { @@ -397,14 +398,32 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; intel_wakeref_t wakeref; + DEFINE_WAIT(wait); int ret = 0; u32 nval; - mutex_lock(>hwmon_lock); - if (hwmon->ddat.reset_in_progress) { - ret = -EAGAIN; - goto unlock; + /* Block waiting for GuC reset to complete when needed */ + for (;;) { + mutex_lock(>hwmon_lock); + + prepare_to_wait(>waitq, , TASK_INTERRUPTIBLE); + + if (!hwmon->ddat.reset_in_progress) + break; + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + + mutex_unlock(>hwmon_lock); + + schedule(); } + finish_wait(>waitq, ); + if (ret) + goto unlock; + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ @@ -508,6 +527,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); hwmon->ddat.reset_in_progress = false; + wake_up_all(>ddat.waitq); mutex_unlock(>hwmon_lock); } @@ -784,6 +804,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) ddat->uncore = >uncore; snprintf(ddat->name, sizeof(ddat->name), "i915"); ddat->gt_n = -1; + init_waitqueue_head(>waitq); for_each_gt(gt, i915, i) { ddat_gt = hwmon->ddat_gt + i; -- 2.38.0
[PATCH 1/3] drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write
In preparation for follow-on patches, refactor hwm_power_max_write to take hwmon_lock and runtime pm wakeref at start of the function and release them at the end, therefore acquiring these just once each. Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_hwmon.c | 28 +++- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8e7dccc8d3a0e..7f44e809ca155 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -396,31 +396,33 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; intel_wakeref_t wakeref; + int ret = 0; u32 nval; + mutex_lock(>hwmon_lock); + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); + /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ if (val == PL1_DISABLE) { - mutex_lock(>hwmon_lock); - with_intel_runtime_pm(ddat->uncore->rpm, wakeref) { - intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, -PKG_PWR_LIM_1_EN, 0); - nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); - } - mutex_unlock(>hwmon_lock); + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); + nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); if (nval & PKG_PWR_LIM_1_EN) - return -ENODEV; - return 0; + ret = -ENODEV; + goto exit; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, - nval); - return 0; + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); +exit: + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); + mutex_unlock(>hwmon_lock); + return ret; } static int -- 2.38.0
[PATCH 3/3] drm/i915/hwmon: Block waiting for GuC reset to complete
Instead of erroring out when GuC reset is in progress, block waiting for GuC reset to complete which is a more reasonable uapi behavior. v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo) Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 38 +++ 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 9ab8971679fe3..8471a667dfc71 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -51,6 +51,7 @@ struct hwm_drvdata { char name[12]; int gt_n; bool reset_in_progress; + wait_queue_head_t waitq; }; struct i915_hwmon { @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val) static int hwm_power_max_write(struct hwm_drvdata *ddat, long val) { +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000) + + int ret = 0, timeout = GUC_RESET_TIMEOUT; struct i915_hwmon *hwmon = ddat->hwmon; intel_wakeref_t wakeref; - int ret = 0; + DEFINE_WAIT(wait); u32 nval; - mutex_lock(>hwmon_lock); - if (hwmon->ddat.reset_in_progress) { - ret = -EAGAIN; - goto unlock; + /* Block waiting for GuC reset to complete when needed */ + for (;;) { + mutex_lock(>hwmon_lock); + + prepare_to_wait(>waitq, , TASK_INTERRUPTIBLE); + + if (!hwmon->ddat.reset_in_progress) + break; + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + + if (!timeout) { + ret = -ETIME; + break; + } + + mutex_unlock(>hwmon_lock); + + timeout = schedule_timeout(timeout); } + finish_wait(>waitq, ); + if (ret) + goto unlock; + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); hwmon->ddat.reset_in_progress = false; + wake_up_all(>ddat.waitq); mutex_unlock(>hwmon_lock); } @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) ddat->uncore = >uncore; snprintf(ddat->name, sizeof(ddat->name), "i915"); ddat->gt_n = -1; + init_waitqueue_head(>waitq); for_each_gt(gt, i915, i) { ddat_gt = hwmon->ddat_gt + i; -- 2.38.0
[PATCH 1/3] drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write
In preparation for follow-on patches, refactor hwm_power_max_write to take hwmon_lock and runtime pm wakeref at start of the function and release them at the end, therefore acquiring these just once each. Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_hwmon.c | 28 +++- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8e7dccc8d3a0e..7f44e809ca155 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -396,31 +396,33 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; intel_wakeref_t wakeref; + int ret = 0; u32 nval; + mutex_lock(>hwmon_lock); + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); + /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ if (val == PL1_DISABLE) { - mutex_lock(>hwmon_lock); - with_intel_runtime_pm(ddat->uncore->rpm, wakeref) { - intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, -PKG_PWR_LIM_1_EN, 0); - nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); - } - mutex_unlock(>hwmon_lock); + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); + nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); if (nval & PKG_PWR_LIM_1_EN) - return -ENODEV; - return 0; + ret = -ENODEV; + goto exit; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, - nval); - return 0; + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); +exit: + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); + mutex_unlock(>hwmon_lock); + return ret; } static int -- 2.38.0
[PATCH 0/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
Updates to Patch 2/3 and Patch 3/3 in this version. Ashutosh Dixit (3): drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write drm/i915/guc: Disable PL1 power limit when loading GuC firmware drm/i915/hwmon: Block waiting for GuC reset to complete drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++- drivers/gpu/drm/i915/i915_hwmon.c | 94 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 ++ 3 files changed, 100 insertions(+), 14 deletions(-) -- 2.38.0
[PATCH 2/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation v4: - Don't hold the lock across GuC reset (Rodrigo) - New locking scheme (suggested by Rodrigo) - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko) v5: - Fix uninitialized pl1en variable compile warning reported by kernel build robot by creating new err_rps label Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++-- drivers/gpu/drm/i915/i915_hwmon.c | 40 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 + 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cba..996168312340e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; int ret, attempts; + bool pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -500,7 +505,7 @@ static int __uc_init_hw(struct intel_uc *uc) */ ret = __uc_sanitize(uc); if (ret) - goto err_out; + goto err_rps; intel_huc_fw_upload(huc); intel_guc_ads_reset(guc); @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } + i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -559,10 +566,12 @@ static int __uc_init_hw(struct intel_uc *uc) intel_guc_submission_disable(guc); err_log_capture: __uc_capture_load_err_log(uc); -err_out: +err_rps: /* Return GT back to RPn */ intel_rps_lower_unslice(_to_gt(uc)->rps); + i915_hwmon_power_max_restore(gt->i915, pl1en); +err_out: __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 7f44e809ca155..9ab8971679fe3 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -50,6 +50,7 @@ struct hwm_drvdata { struct hwm_energy_info ei; /* Energy info for energy1_input */ char name[12]; int gt_n; + bool reset_in_progress; }; struct i915_hwmon { @@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) u32 nval; mutex_lock(>hwmon_lock); + if (hwmon->ddat.reset_in_progress) { + ret = -EAGAIN; + goto unlock; + } wakeref = intel_runtime_pm_get(ddat->uncore->rpm); /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ @@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: intel_runtime_pm_put(ddat->uncore->rpm, wakeref); +unlock: mutex_unlock(>hwmon_lock); return ret; } @@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) +{ + struct i915_hwmon *hwmon = i915->hwmon; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + mutex_
[PATCH 2/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation v4: - Don't hold the lock across GuC reset (Rodrigo) - New locking scheme (suggested by Rodrigo) - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko) Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 ++ drivers/gpu/drm/i915/i915_hwmon.c | 40 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 + 3 files changed, 56 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cba..aa8e35a5636a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; int ret, attempts; + bool pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } + i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* Return GT back to RPn */ intel_rps_lower_unslice(_to_gt(uc)->rps); + i915_hwmon_power_max_restore(gt->i915, pl1en); + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 7f44e809ca155..9ab8971679fe3 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -50,6 +50,7 @@ struct hwm_drvdata { struct hwm_energy_info ei; /* Energy info for energy1_input */ char name[12]; int gt_n; + bool reset_in_progress; }; struct i915_hwmon { @@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) u32 nval; mutex_lock(>hwmon_lock); + if (hwmon->ddat.reset_in_progress) { + ret = -EAGAIN; + goto unlock; + } wakeref = intel_runtime_pm_get(ddat->uncore->rpm); /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ @@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: intel_runtime_pm_put(ddat->uncore->rpm, wakeref); +unlock: mutex_unlock(>hwmon_lock); return ret; } @@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) +{ + struct i915_hwmon *hwmon = i915->hwmon; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + mutex_lock(>hwmon_lock); + + hwmon->ddat.reset_in_progress = true; + r = intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); + *old = !!(r & PKG_PWR_LIM_1_EN); + + mutex_unlock(>hwmon_lock); +} + +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) +{ + struct i915_hwmon *hwmon = i915->hwmon; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + mutex_lock(>hwmon_lock); + + intel_uncore_rmw(hwmon->ddat.uncore, hwm
[PATCH 1/3] drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write
In preparation for follow-on patches, refactor hwm_power_max_write to take hwmon_lock and runtime pm wakeref at start of the function and release them at the end, therefore acquiring these just once each. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 28 +++- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 8e7dccc8d3a0e..7f44e809ca155 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -396,31 +396,33 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; intel_wakeref_t wakeref; + int ret = 0; u32 nval; + mutex_lock(>hwmon_lock); + wakeref = intel_runtime_pm_get(ddat->uncore->rpm); + /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ if (val == PL1_DISABLE) { - mutex_lock(>hwmon_lock); - with_intel_runtime_pm(ddat->uncore->rpm, wakeref) { - intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, -PKG_PWR_LIM_1_EN, 0); - nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); - } - mutex_unlock(>hwmon_lock); + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); + nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); if (nval & PKG_PWR_LIM_1_EN) - return -ENODEV; - return 0; + ret = -ENODEV; + goto exit; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, - nval); - return 0; + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); +exit: + intel_runtime_pm_put(ddat->uncore->rpm, wakeref); + mutex_unlock(>hwmon_lock); + return ret; } static int -- 2.38.0
[PATCH 3/3] drm/i915/hwmon: Block waiting for GuC reset to complete
Instead of erroring out when GuC reset is in progress, block waiting for GuC reset to complete which is a more reasonable uapi behavior. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 9ab8971679fe3..4343efb48e61b 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -51,6 +51,7 @@ struct hwm_drvdata { char name[12]; int gt_n; bool reset_in_progress; + wait_queue_head_t wqh; }; struct i915_hwmon { @@ -400,10 +401,15 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) int ret = 0; u32 nval; +retry: mutex_lock(>hwmon_lock); if (hwmon->ddat.reset_in_progress) { - ret = -EAGAIN; - goto unlock; + mutex_unlock(>hwmon_lock); + ret = wait_event_interruptible(ddat->wqh, + !hwmon->ddat.reset_in_progress); + if (ret) + return ret; + goto retry; } wakeref = intel_runtime_pm_get(ddat->uncore->rpm); @@ -426,7 +432,6 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval); exit: intel_runtime_pm_put(ddat->uncore->rpm, wakeref); -unlock: mutex_unlock(>hwmon_lock); return ret; } @@ -508,6 +513,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0); hwmon->ddat.reset_in_progress = false; + wake_up_all(>ddat.wqh); mutex_unlock(>hwmon_lock); } @@ -784,6 +790,7 @@ void i915_hwmon_register(struct drm_i915_private *i915) ddat->uncore = >uncore; snprintf(ddat->name, sizeof(ddat->name), "i915"); ddat->gt_n = -1; + init_waitqueue_head(>wqh); for_each_gt(gt, i915, i) { ddat_gt = hwmon->ddat_gt + i; -- 2.38.0
[PATCH v4 0/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
Split the v3 patch into 3 patches for easier review, can squash later if needed. Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Ashutosh Dixit (3): drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write drm/i915/guc: Disable PL1 power limit when loading GuC firmware drm/i915/hwmon: Block waiting for GuC reset to complete drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 drivers/gpu/drm/i915/i915_hwmon.c | 75 ++- drivers/gpu/drm/i915/i915_hwmon.h | 7 +++ 3 files changed, 78 insertions(+), 13 deletions(-) -- 2.38.0
[PATCH v3] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit
On ATSM the PL1 limit is disabled at power up. The previous uapi assumed that the PL1 limit is always enabled and therefore did not have a notion of a disabled PL1 limit. This results in erroneous PL1 limit values when the PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit was previously shown as 0 which means a low PL1 limit whereas the limit being disabled actually implies a high effective PL1 limit value. To get round this problem, the PL1 limit uapi is expanded to include a special value 0 to designate a disabled PL1 limit. A read value of 0 means that the PL1 power limit is disabled, writing 0 disables the limit. The link between this patch and the bugs mentioned below is as follows: * Because on ATSM the PL1 power limit is disabled on power up and there were no means to enable it, we previously implemented the means to enable the limit when the PL1 hwmon entry (power1_max) was written to. * Now there is a IGT igt@i915_hwmon@hwmon_write which (a) reads orig value from all hwmon sysfs (b) does a bunch of random writes and finally (c) restores the orig value read. On ATSM since the orig value is 0, when the IGT restores the 0 value, the PL1 limit is now enabled with a value of 0. * PL1 limit of 0 implies a low PL1 limit which causes GPU freq to fall to 100 MHz. This causes GuC FW load and several IGT's to start timing out and gives rise to these Intel CI bugs. After this patch, writing 0 would disable the PL1 limit instead of enabling it, avoiding the freq drop issue. v2: Add explanation for bugs mentioned below (Rodrigo) v3: Eliminate race during PL1 disable and verify (Tvrtko) Change return to -ENODEV if verify fails (Tvrtko) Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8060 Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 4 ++- drivers/gpu/drm/i915/i915_hwmon.c | 26 +++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index 2d6a472eef885..8d7d8f05f6cd0 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -14,7 +14,9 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts. The power controller will throttle the operating frequency if the power averaged over a window (typically seconds) - exceeds this limit. + exceeds this limit. A read value of 0 means that the PL1 + power limit is disabled, writing 0 disables the + limit. Writing values > 0 will enable the power limit. Only supported for particular Intel i915 graphics platforms. diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 596dd2c070106..8e7dccc8d3a0e 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan) } } +#define PL1_DISABLE 0 + /* * HW allows arbitrary PL1 limits to be set but silently clamps these values to * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the @@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val) intel_wakeref_t wakeref; u64 r, min, max; + /* Check if PL1 limit is disabled */ + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + if (!(r & PKG_PWR_LIM_1_EN)) { + *val = PL1_DISABLE; + return 0; + } + *val = hwm_field_read_and_scale(ddat, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1, @@ -385,8 +395,24 @@ static int hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; u32 nval; + /* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */ + if (val == PL1_DISABLE) { + mutex_lock(>hwmon_lock); + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) { + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); + nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + } + mutex_unlock(>hwmon_lock); + + if (nval & PKG_PWR_LIM_1_EN) + return -ENODEV; + return 0; + } + /* Computation
[PATCH v2] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit
On ATSM the PL1 limit is disabled at power up. The previous uapi assumed that the PL1 limit is always enabled and therefore did not have a notion of a disabled PL1 limit. This results in erroneous PL1 limit values when the PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit was previously shown as 0 which means a low PL1 limit whereas the limit being disabled actually implies a high effective PL1 limit value. To get round this problem, the PL1 limit uapi is expanded to include a special value 0 to designate a disabled PL1 limit. A read value of 0 means that the PL1 power limit is disabled, writing 0 disables the limit. The link between this patch and the bugs mentioned below is as follows: * Because on ATSM the PL1 power limit is disabled on power up and there were no means to enable it, we previously implemented the means to enable the limit when the PL1 hwmon entry (power1_max) was written to. * Now there is a IGT igt@i915_hwmon@hwmon_write which (a) reads orig value from all hwmon sysfs (b) does a bunch of random writes and finally (c) restores the orig value read. On ATSM since the orig value is 0, when the IGT restores the 0 value, the PL1 limit is now enabled with a value of 0. * PL1 limit of 0 implies a low PL1 limit which causes GPU freq to fall to 100 MHz. This causes GuC FW load and several IGT's to start timing out and gives rise to these Intel CI bugs. After this patch, writing 0 would disable the PL1 limit instead of enabling it, avoiding the freq drop issue. v2: Add explanation for bugs mentioned below (Rodrigo) Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8060 Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 4 +++- drivers/gpu/drm/i915/i915_hwmon.c | 24 +++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index 2d6a472eef885..8d7d8f05f6cd0 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -14,7 +14,9 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts. The power controller will throttle the operating frequency if the power averaged over a window (typically seconds) - exceeds this limit. + exceeds this limit. A read value of 0 means that the PL1 + power limit is disabled, writing 0 disables the + limit. Writing values > 0 will enable the power limit. Only supported for particular Intel i915 graphics platforms. diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 596dd2c070106..c099057888914 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan) } } +#define PL1_DISABLE 0 + /* * HW allows arbitrary PL1 limits to be set but silently clamps these values to * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the @@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val) intel_wakeref_t wakeref; u64 r, min, max; + /* Check if PL1 limit is disabled */ + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + if (!(r & PKG_PWR_LIM_1_EN)) { + *val = PL1_DISABLE; + return 0; + } + *val = hwm_field_read_and_scale(ddat, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1, @@ -385,8 +395,22 @@ static int hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; u32 nval; + if (val == PL1_DISABLE) { + /* Disable PL1 limit */ + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, 0); + + /* Verify, because PL1 limit cannot be disabled on all platforms */ + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + if (nval & PKG_PWR_LIM_1_EN) + return -EPERM; + return 0; + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); -- 2.38.0
[PATCH] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit
On ATSM the PL1 limit is disabled at power up. The previous uapi assumed that the PL1 limit is always enabled and therefore did not have a notion of a disabled PL1 limit. This results in erroneous PL1 limit values when the PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit was previously shown as 0 which means a low PL1 limit whereas the limit being disabled actually implies a high effective PL1 limit value. To get round this problem, the PL1 limit uapi is expanded to include a special value 0 to designate a disabled PL1 limit. Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8060 Signed-off-by: Ashutosh Dixit --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 3 ++- drivers/gpu/drm/i915/i915_hwmon.c | 24 +++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index 2d6a472eef885..96fec0bb74c2c 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -14,7 +14,8 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts. The power controller will throttle the operating frequency if the power averaged over a window (typically seconds) - exceeds this limit. + exceeds this limit. A read value of 0 means that the PL1 power + limit is disabled. Writing 0 disables the limit if possible. Only supported for particular Intel i915 graphics platforms. diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 596dd2c070106..c099057888914 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan) } } +#define PL1_DISABLE 0 + /* * HW allows arbitrary PL1 limits to be set but silently clamps these values to * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the @@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val) intel_wakeref_t wakeref; u64 r, min, max; + /* Check if PL1 limit is disabled */ + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + if (!(r & PKG_PWR_LIM_1_EN)) { + *val = PL1_DISABLE; + return 0; + } + *val = hwm_field_read_and_scale(ddat, hwmon->rg.pkg_rapl_limit, PKG_PWR_LIM_1, @@ -385,8 +395,22 @@ static int hwm_power_max_write(struct hwm_drvdata *ddat, long val) { struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; u32 nval; + if (val == PL1_DISABLE) { + /* Disable PL1 limit */ + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, 0); + + /* Verify, because PL1 limit cannot be disabled on all platforms */ + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + nval = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + if (nval & PKG_PWR_LIM_1_EN) + return -EPERM; + return 0; + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); -- 2.38.0
[PATCH] Revert "drm/i915/hwmon: Enable PL1 power limit"
This reverts commit ee892ea83d99610fa33bea612de058e0955eec3a. 0349c41b0596 ("drm/i915/hwmon: Enable PL1 power limit") was reverted in 05d5562e401e ("Revert "drm/i915/hwmon: Enable PL1 power limit"") but has appeared again as ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power limit"). Revert it again. Cc: # v6.2+ Cc: Jani Nikula Cc: Rodrigo Vivi Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Fixes: ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power limit") Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 5 - 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc1..596dd2c070106 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -688,11 +688,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(>ddat_gt[i], ); } - - /* Enable PL1 power limit */ - if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- 2.38.0
[PATCH] Revert "drm/i915/hwmon: Enable PL1 power limit"
This reverts commit ee892ea83d99610fa33bea612de058e0955eec3a. 0349c41b0596 ("drm/i915/hwmon: Enable PL1 power limit") was reverted in 05d5562e401e ("Revert "drm/i915/hwmon: Enable PL1 power limit"") but has appeared again as ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power limit"). Revert it again. Cc: Jani Nikula Cc: Rodrigo Vivi Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Fixes: ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power limit") Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 5 - 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc1..596dd2c070106 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -688,11 +688,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(>ddat_gt[i], ); } - - /* Enable PL1 power limit */ - if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- 2.38.0
[PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path v3 (Jani N): - Add/remove explanatory comments - Function renames - Type corrections - Locking annotation Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++ drivers/gpu/drm/i915/i915_hwmon.c | 39 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 + 3 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cba..aa8e35a5636a0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; int ret, attempts; + bool pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable a potentially low PL1 power limit to allow freq to be raised */ + i915_hwmon_power_max_disable(gt->i915, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } + i915_hwmon_power_max_restore(gt->i915, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* Return GT back to RPn */ intel_rps_lower_unslice(_to_gt(uc)->rps); + i915_hwmon_power_max_restore(gt->i915, pl1en); + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc1..769b5bda4d53f 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) + __acquires(i915->hwmon->hwmon_lock) +{ + struct i915_hwmon *hwmon = i915->hwmon; + intel_wakeref_t wakeref; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + /* Take mutex to prevent concurrent hwm_power_max_write */ + mutex_lock(>hwmon_lock); + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + r = intel_uncore_rmw(hwmon->ddat.uncore, +hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); + + *old = !!(r & PKG_PWR_LIM_1_EN); +} + +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) + __releases(i915->hwmon->hwmon_lock) +{ + struct i915_hwmon *hwmon = i915->hwmon; + intel_wakeref_t wakeref; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + intel_uncore_rmw(hwmon->ddat.uncore, +hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, +old ? PKG_PWR_LIM_1_EN : 0); + + mutex_unlock(>hwmon_lock); +} + static umode_t hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h index 7ca9cf2c34c96..0fcb7de844061 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.h +++ b/drivers/gpu/drm/i915/i915_hwmon.h @@ -7,14 +7,21 @@ #ifndef __I915_HWMON_H__ #define __I915_HWMON_H__ +#include + struct drm_i915_private; +struct intel_gt; #if IS_REACHABLE(CONFIG_HWMON) void i915_hwmon_register(struct drm_i915_private *i915); void i915_hwmon_unregister(struct drm_i915_pri
[PATCH] drm/i915/pmu: Use functions common with sysfs to read actual freq
Expose intel_rps_read_actual_frequency_fw to read the actual freq without taking forcewake for use by PMU. The code is refactored to use a common set of functions across sysfs and PMU. Using common functions with sysfs in PMU solves the issues of missing support for MTL and missing support for older generations (prior to Gen6). It also future proofs the PMU where sometimes code has been updated for sysfs and PMU has been missed. v2: Remove runtime_pm_if_in_use from read_actual_frequency_fw (Tvrtko) v3: (Tvrtko) - Remove goto in __read_cagf - Unexport intel_rps_get_cagf and intel_rps_read_punit_req Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8280 Signed-off-by: Ashutosh Dixit Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_rps.c | 38 - drivers/gpu/drm/i915/gt/intel_rps.h | 4 +-- drivers/gpu/drm/i915/i915_pmu.c | 10 +++- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4d0dc9de23f96..6d7395aa404a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - i915_reg_t rpstat; - - rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; - - return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); -} - u32 intel_rps_read_rpstat(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -2066,7 +2056,7 @@ u32 intel_rps_read_rpstat(struct intel_rps *rps) return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); } -u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) +static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; @@ -2089,10 +2079,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) return cagf; } -static u32 read_cagf(struct intel_rps *rps) +static u32 __read_cagf(struct intel_rps *rps, bool take_fw) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + i915_reg_t r = INVALID_MMIO_REG; u32 freq; /* @@ -2100,22 +2091,30 @@ static u32 read_cagf(struct intel_rps *rps) * registers will return 0 freq when GT is in RC6 */ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { - freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + r = MTL_MIRROR_TARGET_WP1; } else if (GRAPHICS_VER(i915) >= 12) { - freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + r = GEN12_RPSTAT1; } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); } else if (GRAPHICS_VER(i915) >= 6) { - freq = intel_uncore_read(uncore, GEN6_RPSTAT1); + r = GEN6_RPSTAT1; } else { - freq = intel_uncore_read(uncore, MEMSTAT_ILK); + r = MEMSTAT_ILK; } + if (i915_mmio_reg_valid(r)) + freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); + return intel_rps_get_cagf(rps, freq); } +static u32 read_cagf(struct intel_rps *rps) +{ + return __read_cagf(rps, true); +} + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -2128,7 +2127,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } -u32 intel_rps_read_punit_req(struct intel_rps *rps) +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) +{ + return intel_gpu_freq(rps, __read_cagf(rps, false)); +} + +static u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index c622962c6befb..a3fa987aa91f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -37,8 +37,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive); int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); -u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_re
[PATCH v2] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when allowed by HW when loading GuC firmware. v2: - Take mutex (to disallow writes to power1_max) across GuC reset/fw load - Add hwm_power_max_restore to error return code path Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 10 ++- drivers/gpu/drm/i915/i915_hwmon.c | 39 +++ drivers/gpu/drm/i915/i915_hwmon.h | 7 + 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4ccb4be4c9cb..15f8e94edc61 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -460,7 +461,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct drm_i915_private *i915 = gt->i915; struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; - int ret, attempts; + int ret, attempts, pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +492,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable PL1 limit before raising freq */ + hwm_power_max_disable(gt, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -547,6 +551,8 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } + hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */ + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); @@ -563,6 +569,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* Return GT back to RPn */ intel_rps_lower_unslice(_to_gt(uc)->rps); + hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */ + __uc_sanitize(uc); if (!ret) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc..2bbca75ac477 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void hwm_power_max_disable(struct intel_gt *gt, u32 *old) +{ + struct i915_hwmon *hwmon = gt->i915->hwmon; + intel_wakeref_t wakeref; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + /* Take mutex to prevent concurrent hwm_power_max_write */ + mutex_lock(>hwmon_lock); + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + r = intel_uncore_rmw(hwmon->ddat.uncore, +hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, 0); + + *old = !!(r & PKG_PWR_LIM_1_EN); + + /* hwmon_lock mutex is unlocked in hwm_power_max_restore */ +} + +void hwm_power_max_restore(struct intel_gt *gt, u32 old) +{ + struct i915_hwmon *hwmon = gt->i915->hwmon; + intel_wakeref_t wakeref; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref) + intel_uncore_rmw(hwmon->ddat.uncore, +hwmon->rg.pkg_rapl_limit, +PKG_PWR_LIM_1_EN, +old ? PKG_PWR_LIM_1_EN : 0); + + mutex_unlock(>hwmon_lock); +} + static umode_t hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h index 7ca9cf2c34c9..0c2db11be2e2 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.h +++ b/drivers/gpu/drm/i915/i915_hwmon.h @@ -7,14 +7,21 @@ #ifndef __I915_HWMON_H__ #define __I915_HWMON_H__ +#include + struct drm_i915_private; +struct intel_gt; #if IS_REACHABLE(CONFIG_HWMON) void i915_hwmon_register(struct drm_i915_private *i915); void i915_hwmon_unregister(struct drm_i915_private *i915); +void hwm_power_max_disable(struct intel_gt *gt, u32 *old); +void hwm_power_max_restore(struct intel_
[PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware
On dGfx, the PL1 power limit being enabled and set to a low value results in a low GPU operating freq. It also negates the freq raise operation which is done before GuC firmware load. As a result GuC firmware load can time out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power limit was enabled and set to a low value). Therefore disable the PL1 power limit when possible when loading GuC firmware. Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 ++- drivers/gpu/drm/i915/i915_hwmon.c | 34 +-- drivers/gpu/drm/i915/i915_hwmon.h | 7 ++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 1b7ecd384a79..8794d54500d7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -18,6 +18,7 @@ #include "intel_uc.h" #include "i915_drv.h" +#include "i915_hwmon.h" static const struct intel_uc_ops uc_ops_off; static const struct intel_uc_ops uc_ops_on; @@ -460,7 +461,7 @@ static int __uc_init_hw(struct intel_uc *uc) struct drm_i915_private *i915 = gt->i915; struct intel_guc *guc = >guc; struct intel_huc *huc = >huc; - int ret, attempts; + int ret, attempts, pl1en; GEM_BUG_ON(!intel_uc_supports_guc(uc)); GEM_BUG_ON(!intel_uc_wants_guc(uc)); @@ -491,6 +492,9 @@ static int __uc_init_hw(struct intel_uc *uc) else attempts = 1; + /* Disable PL1 limit before raising freq when possible */ + hwm_power_max_disable(gt, ); + intel_rps_raise_unslice(_to_gt(uc)->rps); while (attempts--) { @@ -544,6 +548,9 @@ static int __uc_init_hw(struct intel_uc *uc) intel_rps_lower_unslice(_to_gt(uc)->rps); } + /* Restore PL1 limit */ + hwm_power_max_restore(gt, pl1en); + guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc))); guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc))); diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index ee63a8fd88fc..4ce3da7b7adc 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -62,20 +62,23 @@ struct i915_hwmon { int scl_shift_time; }; -static void +static u32 hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat, i915_reg_t reg, u32 clear, u32 set) { struct i915_hwmon *hwmon = ddat->hwmon; struct intel_uncore *uncore = ddat->uncore; intel_wakeref_t wakeref; + u32 old; mutex_lock(>hwmon_lock); with_intel_runtime_pm(uncore->rpm, wakeref) - intel_uncore_rmw(uncore, reg, clear, set); + old = intel_uncore_rmw(uncore, reg, clear, set); mutex_unlock(>hwmon_lock); + + return old; } /* @@ -444,6 +447,33 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) } } +void hwm_power_max_disable(struct intel_gt *gt, u32 *old) +{ + struct i915_hwmon *hwmon = gt->i915->hwmon; + u32 r; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + r = hwm_locked_with_pm_intel_uncore_rmw(>ddat, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, 0); + *old = !!(r & PKG_PWR_LIM_1_EN); +} + +void hwm_power_max_restore(struct intel_gt *gt, u32 old) +{ + struct i915_hwmon *hwmon = gt->i915->hwmon; + + if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + return; + + hwm_locked_with_pm_intel_uncore_rmw(>ddat, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, + old ? PKG_PWR_LIM_1_EN : 0); +} + static umode_t hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr) { diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h index 7ca9cf2c34c9..0c2db11be2e2 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.h +++ b/drivers/gpu/drm/i915/i915_hwmon.h @@ -7,14 +7,21 @@ #ifndef __I915_HWMON_H__ #define __I915_HWMON_H__ +#include + struct drm_i915_private; +struct intel_gt; #if IS_REACHABLE(CONFIG_HWMON) void i915_hwmon_register(struct drm_i915_private *i915); void i915_hwmon_unregister(struct drm_i915_private *i915); +void hwm_power_max_disable(struct intel_gt *gt, u32 *old); +void hwm_power_max_restore(struct intel_gt *gt, u32 old); #else static inline void i915_hwmon_register(struct drm_i915_private *i9
[PATCH 2/2] drm/i915/pmu: Remove fallback to requested freq for SLPC
The fallback to requested freq does not work for SLPC because SLPC does not use 'struct intel_rps'. Also for SLPC requested freq can only be obtained from a hw register after acquiring forcewake which we don't want to do for PMU. Therefore remove fallback to requested freq for SLPC. The actual freq will be 0 when gt is in RC6 which is correct. Also this is rare since PMU freq sampling happens only when gt is unparked. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_pmu.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 7ece883a7d95..f697fabed64a 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -393,7 +393,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * frequency. Fortunately, the read should rarely fail! */ val = intel_rps_read_actual_frequency_fw(rps); - if (!val) + + /* +* SLPC does not use 'struct intel_rps'. Also for SLPC +* requested freq can only be obtained after acquiring +* forcewake and reading a hw register. For SLPC just +* let val be 0 +*/ + if (!val && !intel_uc_uses_guc_slpc(>uc)) val = intel_gpu_freq(rps, rps->cur_freq); add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT], -- 2.38.0
[PATCH 1/2] drm/i915/pmu: Use functions common with sysfs to read actual freq
Expose intel_rps_read_actual_frequency_fw to read the actual freq without taking forcewake for use by PMU. The code is refactored to use a common set of functions across sysfs and PMU. Using common functions with sysfs in PMU solves the issues of missing support for MTL and missing support for older generations (prior to Gen6). It also future proofs the PMU where sometimes code has been updated for sysfs and PMU has been missed. v2: Remove runtime_pm_if_in_use from read_actual_frequency_fw (Tvrtko) Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8280 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_rps.c | 34 - drivers/gpu/drm/i915/gt/intel_rps.h | 2 +- drivers/gpu/drm/i915/i915_pmu.c | 10 - 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4d0dc9de23f9..9d9ac35691fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - i915_reg_t rpstat; - - rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; - - return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); -} - u32 intel_rps_read_rpstat(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -2089,10 +2079,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) return cagf; } -static u32 read_cagf(struct intel_rps *rps) +static u32 __read_cagf(struct intel_rps *rps, bool take_fw) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + i915_reg_t r = INVALID_MMIO_REG; u32 freq; /* @@ -2100,22 +2091,30 @@ static u32 read_cagf(struct intel_rps *rps) * registers will return 0 freq when GT is in RC6 */ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { - freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + r = MTL_MIRROR_TARGET_WP1; } else if (GRAPHICS_VER(i915) >= 12) { - freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + r = GEN12_RPSTAT1; } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); + goto exit; } else if (GRAPHICS_VER(i915) >= 6) { - freq = intel_uncore_read(uncore, GEN6_RPSTAT1); + r = GEN6_RPSTAT1; } else { - freq = intel_uncore_read(uncore, MEMSTAT_ILK); + r = MEMSTAT_ILK; } + freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); +exit: return intel_rps_get_cagf(rps, freq); } +static u32 read_cagf(struct intel_rps *rps) +{ + return __read_cagf(rps, true); +} + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -2128,6 +2127,11 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) +{ + return intel_gpu_freq(rps, __read_cagf(rps, false)); +} + u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index c622962c6bef..2d5b3ef58606 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -39,6 +39,7 @@ int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); u32 intel_rps_get_min_frequency(struct intel_rps *rps); u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); @@ -52,7 +53,6 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_rpstat(struct intel_rps *rps); -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); void intel_rps_raise_unslice(struct intel_rps *rps); void intel_rps_lower_unslice(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/dri
[PATCH 0/2] drm/i915/pmu: Use common freq functions with sysfs
Expose intel_rps_read_actual_frequency_fw to read the actual freq without taking forcewake for use by PMU. The code is refactored to use a common set of functions across sysfs and PMU. Using common functions with sysfs in PMU solves the issues of missing support for MTL and missing support for older generations (prior to Gen6). It also future proofs the PMU where sometimes code has been updated for sysfs and PMU has been missed. Ashutosh Dixit (2): drm/i915/pmu: Use functions common with sysfs to read actual freq drm/i915/pmu: Remove fallback to requested freq for SLPC drivers/gpu/drm/i915/gt/intel_rps.c | 34 - drivers/gpu/drm/i915/gt/intel_rps.h | 2 +- drivers/gpu/drm/i915/i915_pmu.c | 17 ++- 3 files changed, 31 insertions(+), 22 deletions(-) -- 2.38.0
[PATCH 1/2] drm/i915/pmu: Use functions common with sysfs to read actual freq
Expose intel_rps_read_actual_frequency_fw to read the actual freq without taking forcewake for use by PMU. The code is refactored to use a common set of functions across sysfs and PMU. Using common functions with sysfs in PMU solves the issues of missing support for MTL and missing support for older generations (prior to Gen6). It also future proofs the PMU where sometimes code has been updated for sysfs and PMU has been missed. Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8280 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_rps.c | 46 +++-- drivers/gpu/drm/i915/gt/intel_rps.h | 2 +- drivers/gpu/drm/i915/i915_pmu.c | 10 +++ 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4d0dc9de23f9..3957c5ee5cba 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - i915_reg_t rpstat; - - rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; - - return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); -} - u32 intel_rps_read_rpstat(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); @@ -2089,10 +2079,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) return cagf; } -static u32 read_cagf(struct intel_rps *rps) +static u32 __read_cagf(struct intel_rps *rps, bool take_fw) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + i915_reg_t r = INVALID_MMIO_REG; u32 freq; /* @@ -2100,22 +2091,30 @@ static u32 read_cagf(struct intel_rps *rps) * registers will return 0 freq when GT is in RC6 */ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { - freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + r = MTL_MIRROR_TARGET_WP1; } else if (GRAPHICS_VER(i915) >= 12) { - freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + r = GEN12_RPSTAT1; } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); + goto exit; } else if (GRAPHICS_VER(i915) >= 6) { - freq = intel_uncore_read(uncore, GEN6_RPSTAT1); + r = GEN6_RPSTAT1; } else { - freq = intel_uncore_read(uncore, MEMSTAT_ILK); + r = MEMSTAT_ILK; } + freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); +exit: return intel_rps_get_cagf(rps, freq); } +static u32 read_cagf(struct intel_rps *rps) +{ + return __read_cagf(rps, true); +} + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -2128,6 +2127,23 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +static u32 read_cagf_fw(struct intel_rps *rps) +{ + return __read_cagf(rps, false); +} + +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) +{ + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; + + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_gpu_freq(rps, read_cagf_fw(rps)); + + return freq; +} + u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index c622962c6bef..2d5b3ef58606 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -39,6 +39,7 @@ int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); u32 intel_rps_get_min_frequency(struct intel_rps *rps); u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); @@ -52,7 +53,6 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_rpstat(struct intel_rps *rps); -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); void gen6_rps_get_
[PATCH 0/2] drm/i915/pmu: Use common freq functions with sysfs
Expose intel_rps_read_actual_frequency_fw to read the actual freq without taking forcewake for use by PMU. The code is refactored to use a common set of functions across sysfs and PMU. Using common functions with sysfs in PMU solves the issues of missing support for MTL and missing support for older generations (prior to Gen6). It also future proofs the PMU where sometimes code has been updated for sysfs and PMU has been missed. Ashutosh Dixit (2): drm/i915/pmu: Use functions common with sysfs to read actual freq drm/i915/pmu: Remove fallback to requested freq for SLPC drivers/gpu/drm/i915/gt/intel_rps.c | 46 +++-- drivers/gpu/drm/i915/gt/intel_rps.h | 2 +- drivers/gpu/drm/i915/i915_pmu.c | 17 +++ 3 files changed, 43 insertions(+), 22 deletions(-) -- 2.38.0
[PATCH 2/2] drm/i915/pmu: Remove fallback to requested freq for SLPC
The fallback to requested freq does not work for SLPC because SLPC does not use 'struct intel_rps'. Also for SLPC requested freq can only be obtained from a hw register after acquiring forcewake which we don't want to do for PMU. Therefore remove fallback to requested freq for SLPC. The actual freq will be 0 when gt is in RC6 which is correct. Also this is rare since PMU freq sampling happens only when gt is unparked. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_pmu.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 7ece883a7d95..f697fabed64a 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -393,7 +393,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * frequency. Fortunately, the read should rarely fail! */ val = intel_rps_read_actual_frequency_fw(rps); - if (!val) + + /* +* SLPC does not use 'struct intel_rps'. Also for SLPC +* requested freq can only be obtained after acquiring +* forcewake and reading a hw register. For SLPC just +* let val be 0 +*/ + if (!val && !intel_uc_uses_guc_slpc(>uc)) val = intel_gpu_freq(rps, rps->cur_freq); add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT], -- 2.38.0
[PATCH 0/3] drm/i915/pmu: Use common freq functions with sysfs
Using common freq functions with sysfs in PMU (but without taking forcewake) solves the following issues (a) missing support for MTL (b) missing support for older generations (prior to Gen6) (c) missing support for slpc when freq sampling has to fall back to requested freq. It also makes the PMU code future proof where sometimes code has been updated for sysfs and PMU has been missed. Ashutosh Dixit (3): drm/i915/rps: Expose read_actual_frequency_fw for PMU drm/i915/rps: Expose get_requested_frequency_fw for PMU drm/i915/pmu: Use common freq functions with sysfs drivers/gpu/drm/i915/gt/intel_rps.c | 68 + drivers/gpu/drm/i915/gt/intel_rps.h | 4 +- drivers/gpu/drm/i915/i915_pmu.c | 10 ++--- 3 files changed, 56 insertions(+), 26 deletions(-) -- 2.38.0
[PATCH 2/3] drm/i915/rps: Expose get_requested_frequency_fw for PMU
Expose intel_rps_get_requested_frequency_fw to read the requested freq without taking forcewake. This is done for use by PMU which does not take forcewake when reading freq. The code is refactored to use a common set of functions across sysfs and PMU. It also allows PMU to support both host turbo (rps) and slpc which was previously missed due to the non-use of common functions across sysfs and PMU. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_rps.c | 22 +++--- drivers/gpu/drm/i915/gt/intel_rps.h | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0a8e24bcb874..49df31927c0e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2154,7 +2154,7 @@ u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) return freq; } -u32 intel_rps_read_punit_req(struct intel_rps *rps) +static u32 intel_rps_read_punit_req(struct intel_rps *rps, bool take_fw) { struct intel_uncore *uncore = rps_to_uncore(rps); struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -2162,7 +2162,8 @@ u32 intel_rps_read_punit_req(struct intel_rps *rps) u32 freq = 0; with_intel_runtime_pm_if_in_use(rpm, wakeref) - freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); + freq = take_fw ? intel_uncore_read(uncore, GEN6_RPNSWREQ) : + intel_uncore_read_fw(uncore, GEN6_RPNSWREQ); return freq; } @@ -2176,7 +2177,7 @@ static u32 intel_rps_get_req(u32 pureq) u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) { - u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); + u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps, true)); return intel_gpu_freq(rps, freq); } @@ -2189,6 +2190,21 @@ u32 intel_rps_get_requested_frequency(struct intel_rps *rps) return intel_gpu_freq(rps, rps->cur_freq); } +static u32 intel_rps_read_punit_req_frequency_fw(struct intel_rps *rps) +{ + u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps, false)); + + return intel_gpu_freq(rps, freq); +} + +u32 intel_rps_get_requested_frequency_fw(struct intel_rps *rps) +{ + if (rps_uses_slpc(rps)) + return intel_rps_read_punit_req_frequency_fw(rps); + else + return intel_gpu_freq(rps, rps->cur_freq); +} + u32 intel_rps_get_max_frequency(struct intel_rps *rps) { struct intel_guc_slpc *slpc = rps_to_slpc(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 63511b826a97..a990f985ab23 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -41,6 +41,7 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); +u32 intel_rps_get_requested_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_min_frequency(struct intel_rps *rps); u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val); @@ -50,7 +51,6 @@ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); -u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_rpstat(struct intel_rps *rps); u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); -- 2.38.0
[PATCH 3/3] drm/i915/pmu: Use common freq functions with sysfs
Using common freq functions with sysfs in PMU (but without taking forcewake) solves the following issues (a) missing support for MTL (b) missing support for older generation (prior to Gen6) (c) missing support for slpc when freq sampling has to fall back to requested freq. It also makes the PMU code future proof where sometimes code has been updated for sysfs and PMU has been missed. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_rps.c | 10 -- drivers/gpu/drm/i915/gt/intel_rps.h | 1 - drivers/gpu/drm/i915/i915_pmu.c | 10 -- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 49df31927c0e..b03bfbe7ee23 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) -{ - struct drm_i915_private *i915 = rps_to_i915(rps); - i915_reg_t rpstat; - - rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; - - return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); -} - u32 intel_rps_read_rpstat(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index a990f985ab23..60ae27679011 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -53,7 +53,6 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); u32 intel_rps_read_rpstat(struct intel_rps *rps); -u32 intel_rps_read_rpstat_fw(struct intel_rps *rps); void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps); void intel_rps_raise_unslice(struct intel_rps *rps); void intel_rps_lower_unslice(struct intel_rps *rps); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index a76c5ce9513d..1a4c9fed257c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -392,14 +392,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * case we assume the system is running at the intended * frequency. Fortunately, the read should rarely fail! */ - val = intel_rps_read_rpstat_fw(rps); - if (val) - val = intel_rps_get_cagf(rps, val); - else - val = rps->cur_freq; + val = intel_rps_read_actual_frequency_fw(rps); + if (!val) + val = intel_rps_get_requested_frequency_fw(rps), add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(rps, val), period_ns / 1000); + val, period_ns / 1000); } if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { -- 2.38.0
[PATCH 1/3] drm/i915/rps: Expose read_actual_frequency_fw for PMU
Expose intel_rps_read_actual_frequency_fw to read the actual/granted freq without taking forcewake. This is done for use by PMU which does not take forcewake when reading freq. The code is refactored to use a common set of functions across sysfs and PMU. It also allows PMU to support MTL as well as older generations (before Gen6) which were previously missed due to the non-use of common functions across sysfs and PMU. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_rps.c | 36 + drivers/gpu/drm/i915/gt/intel_rps.h | 1 + 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 4d0dc9de23f9..0a8e24bcb874 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2089,10 +2089,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) return cagf; } -static u32 read_cagf(struct intel_rps *rps) +static u32 __read_cagf(struct intel_rps *rps, bool take_fw) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + i915_reg_t r = INVALID_MMIO_REG; u32 freq; /* @@ -2100,22 +2101,30 @@ static u32 read_cagf(struct intel_rps *rps) * registers will return 0 freq when GT is in RC6 */ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { - freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + r = MTL_MIRROR_TARGET_WP1; } else if (GRAPHICS_VER(i915) >= 12) { - freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + r = GEN12_RPSTAT1; } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); + goto exit; } else if (GRAPHICS_VER(i915) >= 6) { - freq = intel_uncore_read(uncore, GEN6_RPSTAT1); + r = GEN6_RPSTAT1; } else { - freq = intel_uncore_read(uncore, MEMSTAT_ILK); + r = MEMSTAT_ILK; } + freq = take_fw ? intel_uncore_read(uncore, r) : intel_uncore_read_fw(uncore, r); +exit: return intel_rps_get_cagf(rps, freq); } +static u32 read_cagf(struct intel_rps *rps) +{ + return __read_cagf(rps, true); +} + u32 intel_rps_read_actual_frequency(struct intel_rps *rps) { struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; @@ -2128,6 +2137,23 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +static u32 read_cagf_fw(struct intel_rps *rps) +{ + return __read_cagf(rps, false); +} + +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps) +{ + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; + + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_gpu_freq(rps, read_cagf_fw(rps)); + + return freq; +} + u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index c622962c6bef..63511b826a97 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -39,6 +39,7 @@ int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps); u32 intel_rps_get_requested_frequency(struct intel_rps *rps); u32 intel_rps_get_min_frequency(struct intel_rps *rps); u32 intel_rps_get_min_raw_freq(struct intel_rps *rps); -- 2.38.0
[PATCH 2/2] drm/i915/pmu: Use correct requested freq for SLPC
SLPC does not use 'struct intel_rps'. Use UNSLICE_RATIO bits from GEN6_RPNSWREQ for SLPC. See intel_rps_get_requested_frequency. Bspec: 52745 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_pmu.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index f0a1e36915b8..5ee836610801 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -394,8 +394,13 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * frequency. Fortunately, the read should rarely fail! */ val = intel_rps_get_cagf(rps, intel_rps_read_rpstat_fw(rps)); - if (!val) - val = rps->cur_freq; + if (!val) { + if (intel_uc_uses_guc_slpc(>uc)) + val = intel_rps_read_punit_req(rps) >> + GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; + else + val = rps->cur_freq; + } add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT], intel_gpu_freq(rps, val), period_ns / 1000); -- 2.38.0
[PATCH 0/2] drm/i915/pmu: Freq sampling: Fix requested freq fallback
A couple of minor fixes to the PMU requested freq fallback for PMU freq sampling. Ashutosh Dixit (2): drm/i915/pmu: Use only freq bits for falling back to requested freq drm/i915/pmu: Use correct requested freq for SLPC drivers/gpu/drm/i915/i915_pmu.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) -- 2.38.0
[PATCH 1/2] drm/i915/pmu: Use only freq bits for falling back to requested freq
On newer generations, the GEN12_RPSTAT1 register contains more than freq information, e.g. see GEN12_VOLTAGE_MASK. Therefore use only the freq bits to decide whether to fall back to requested freq. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_pmu.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 52531ab28c5f..f0a1e36915b8 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -393,10 +393,8 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) * case we assume the system is running at the intended * frequency. Fortunately, the read should rarely fail! */ - val = intel_rps_read_rpstat_fw(rps); - if (val) - val = intel_rps_get_cagf(rps, val); - else + val = intel_rps_get_cagf(rps, intel_rps_read_rpstat_fw(rps)); + if (!val) val = rps->cur_freq; add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT], -- 2.38.0
[PATCH] drm/i915/hwmon: Accept writes of value 0 to power1_max_interval
The value shown by power1_max_interval in millisec is essentially: ((1.x * power(2,y)) * 1000) >> 10 Where x and y are read from a HW register. On ATSM, x and y are 0 on power-up so the value shown is 0. Writes of 0 to power1_max_interval had previously been disallowed to avoid computing ilog2(0) but this resulted in the corner-case bug below. Therefore allow writes of 0 now but special case that write to x = y = 0. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7754 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 7c20a6f47b92e..596dd2c070106 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -218,11 +218,15 @@ hwm_power1_max_interval_store(struct device *dev, /* val in hw units */ val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); /* Convert to 1.x * power(2,y) */ - if (!val) - return -EINVAL; - y = ilog2(val); - /* x = (val - (1 << y)) >> (y - 2); */ - x = (val - (1ul << y)) << x_w >> y; + if (!val) { + /* Avoid ilog2(0) */ + y = 0; + x = 0; + } else { + y = ilog2(val); + /* x = (val - (1 << y)) >> (y - 2); */ + x = (val - (1ul << y)) << x_w >> y; + } rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); -- 2.38.0
[PATCH 2/2] drm/i915/hwmon: Enable PL1 limit when writing limit value to HW
Previous documentation suggested that the PL1 power limit is always enabled in HW. However we now find this not to be the case on some platforms (such as ATSM). Therefore enable the PL1 power limit (by setting the enable bit) when writing the PL1 limit value to HW. Bspec: 51864 Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_hwmon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 85195d61f89c7..7c20a6f47b92e 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -385,10 +385,11 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); + nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1, - REG_FIELD_PREP(PKG_PWR_LIM_1, nval)); + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, + nval); return 0; } -- 2.38.0
[PATCH 1/2] drm/i915/hwmon: Replace hwm_field_scale_and_write with hwm_power_max_write
hwm_field_scale_and_write has a single caller hwm_power_write and is specific to hwm_power_write but makes it appear that it is a general function which can have multiple callers. Replace the function with hwm_power_max_write which is specific to hwm_power_write and use that in future patches where the function needs to be extended. Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_hwmon.c | 36 ++- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 1225bc432f0d5..85195d61f89c7 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -99,20 +99,6 @@ hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr, return mul_u64_u32_shr(reg_value, scale_factor, nshift); } -static void -hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr, - int nshift, unsigned int scale_factor, long lval) -{ - u32 nval; - - /* Computation in 64-bits to avoid overflow. Round to nearest. */ - nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor); - - hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr, - PKG_PWR_LIM_1, - REG_FIELD_PREP(PKG_PWR_LIM_1, nval)); -} - /* * hwm_energy - Obtain energy value * @@ -391,6 +377,21 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val) return 0; } +static int +hwm_power_max_write(struct hwm_drvdata *ddat, long val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + u32 nval; + + /* Computation in 64-bits to avoid overflow. Round to nearest. */ + nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); + + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1, + REG_FIELD_PREP(PKG_PWR_LIM_1, nval)); + return 0; +} + static int hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) { @@ -425,16 +426,11 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) static int hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) { - struct i915_hwmon *hwmon = ddat->hwmon; u32 uval; switch (attr) { case hwmon_power_max: - hwm_field_scale_and_write(ddat, - hwmon->rg.pkg_rapl_limit, - hwmon->scl_shift_power, - SF_POWER, val); - return 0; + return hwm_power_max_write(ddat, val); case hwmon_power_crit: uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER); return hwm_pcode_write_i1(ddat->uncore->i915, uval); -- 2.38.0
[PATCH 0/2] PL1 power limit fixes for ATSM
Previous PL1 power limit implementation assumed that the PL1 limit is always enabled in HW. However we now find this not to be the case on ATSM where the PL1 limit is disabled at power up. This requires changes in the previous PL1 limit implementation. v2: Dropping Patch 3 (since it is NAK'd by hwmon) so that the first two patches can get merged. The first two patches are sufficient to fix the main ATSM issue. Ashutosh Dixit (2): drm/i915/hwmon: Replace hwm_field_scale_and_write with hwm_power_max_write drm/i915/hwmon: Enable PL1 limit when writing limit value to HW drivers/gpu/drm/i915/i915_hwmon.c | 37 ++- 1 file changed, 17 insertions(+), 20 deletions(-) -- 2.38.0
[PATCH 1/3] drm/i915/hwmon: Replace hwm_field_scale_and_write with hwm_power_max_write
hwm_field_scale_and_write has a single caller hwm_power_write and is specific to hwm_power_write but makes it appear that it is a general function which can have multiple callers. Replace the function with hwm_power_max_write which is specific to hwm_power_write and use that in future patches where the function needs to be extended. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 36 ++- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 1225bc432f0d5..85195d61f89c7 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -99,20 +99,6 @@ hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr, return mul_u64_u32_shr(reg_value, scale_factor, nshift); } -static void -hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr, - int nshift, unsigned int scale_factor, long lval) -{ - u32 nval; - - /* Computation in 64-bits to avoid overflow. Round to nearest. */ - nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor); - - hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr, - PKG_PWR_LIM_1, - REG_FIELD_PREP(PKG_PWR_LIM_1, nval)); -} - /* * hwm_energy - Obtain energy value * @@ -391,6 +377,21 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val) return 0; } +static int +hwm_power_max_write(struct hwm_drvdata *ddat, long val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + u32 nval; + + /* Computation in 64-bits to avoid overflow. Round to nearest. */ + nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); + + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1, + REG_FIELD_PREP(PKG_PWR_LIM_1, nval)); + return 0; +} + static int hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) { @@ -425,16 +426,11 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) static int hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val) { - struct i915_hwmon *hwmon = ddat->hwmon; u32 uval; switch (attr) { case hwmon_power_max: - hwm_field_scale_and_write(ddat, - hwmon->rg.pkg_rapl_limit, - hwmon->scl_shift_power, - SF_POWER, val); - return 0; + return hwm_power_max_write(ddat, val); case hwmon_power_crit: uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER); return hwm_pcode_write_i1(ddat->uncore->i915, uval); -- 2.38.0
[PATCH 2/3] drm/i915/hwmon: Enable PL1 limit when writing limit value to HW
Previous documentation suggested that the PL1 power limit is always enabled in HW. However we now find this not to be the case on some platforms (such as ATSM). Therefore enable the PL1 power limit (by setting the enable bit) when writing the PL1 limit value to HW. Bspec: 51864 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 85195d61f89c7..7c20a6f47b92e 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -385,10 +385,11 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val) /* Computation in 64-bits to avoid overflow. Round to nearest. */ nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, SF_POWER); + nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval); hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1, - REG_FIELD_PREP(PKG_PWR_LIM_1, nval)); + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, + nval); return 0; } -- 2.38.0
[PATCH 3/3] drm/i915/hwmon: Expose power1_max_enable
On ATSM the PL1 power limit is disabled at power up. The previous uapi assumed that the PL1 limit is always enabled and therefore did not have a notion of a disabled PL1 limit. This results in erroneous PL1 limit values when PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit is shown as 0 which means a low PL1 limit whereas the limit being disabled actually implies a high effective PL1 limit value. To get round this problem, expose power1_max_enable as a custom hwmon attribute. power1_max_enable can be used in conjunction with power1_max to interpret power1_max (PL1 limit) values correctly. It can also be used to enable/disable the PL1 power limit. Signed-off-by: Ashutosh Dixit --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 7 +++ drivers/gpu/drm/i915/i915_hwmon.c | 48 +-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index 2d6a472eef885..edd94a44b4570 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -18,6 +18,13 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts. Only supported for particular Intel i915 graphics platforms. +What: /sys/devices/.../hwmon/hwmon/power1_max_enable +Date: May 2023 +KernelVersion: 6.3 +Contact: intel-...@lists.freedesktop.org +Description: RW. Enable/disable the PL1 power limit (power1_max). + + Only supported for particular Intel i915 graphics platforms. What: /sys/devices/.../hwmon/hwmon/power1_rated_max Date: February 2023 KernelVersion: 6.2 diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 7c20a6f47b92e..5665869d8602b 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -230,13 +230,52 @@ hwm_power1_max_interval_store(struct device *dev, PKG_PWR_LIM_1_TIME, rxy); return count; } +static SENSOR_DEVICE_ATTR_RW(power1_max_interval, hwm_power1_max_interval, 0); -static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, - hwm_power1_max_interval_show, - hwm_power1_max_interval_store, 0); +static ssize_t +hwm_power1_max_enable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + intel_wakeref_t wakeref; + u32 r; + + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit); + + return sysfs_emit(buf, "%u\n", !!(r & PKG_PWR_LIM_1_EN)); +} + +static ssize_t +hwm_power1_max_enable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + intel_wakeref_t wakeref; + u32 en, r; + bool _en; + int ret; + + ret = kstrtobool(buf, &_en); + if (ret) + return ret; + + en = REG_FIELD_PREP(PKG_PWR_LIM_1_EN, _en); + hwm_locked_with_pm_intel_uncore_rmw(ddat, ddat->hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, en); + + /* Verify, because PL1 limit cannot be disabled on all platforms */ + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read(ddat->uncore, ddat->hwmon->rg.pkg_rapl_limit); + if ((r & PKG_PWR_LIM_1_EN) != en) + return -EPERM; + + return count; +} +static SENSOR_DEVICE_ATTR_RW(power1_max_enable, hwm_power1_max_enable, 0); static struct attribute *hwm_attributes[] = { _dev_attr_power1_max_interval.dev_attr.attr, + _dev_attr_power1_max_enable.dev_attr.attr, NULL }; @@ -247,7 +286,8 @@ static umode_t hwm_attributes_visible(struct kobject *kobj, struct hwm_drvdata *ddat = dev_get_drvdata(dev); struct i915_hwmon *hwmon = ddat->hwmon; - if (attr == _dev_attr_power1_max_interval.dev_attr.attr) + if (attr == _dev_attr_power1_max_interval.dev_attr.attr || + attr == _dev_attr_power1_max_enable.dev_attr.attr) return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0; return 0; -- 2.38.0
[PATCH 0/3] PL1 power limit fixes for ATSM
Previous PL1 power limit implementation assumed that the PL1 limit is always enabled in HW. However we now find this not to be the case on ATSM where the PL1 limit is disabled at power up. This requires changes in the previous PL1 limit implementation. Submitting 3 patches for easier review but patches can be squashed if needed. Ashutosh Dixit (3): drm/i915/hwmon: Replace hwm_field_scale_and_write with hwm_power_max_write drm/i915/hwmon: Enable PL1 limit when writing limit value to HW drm/i915/hwmon: Expose power1_max_enable .../ABI/testing/sysfs-driver-intel-i915-hwmon | 7 ++ drivers/gpu/drm/i915/i915_hwmon.c | 85 +-- 2 files changed, 68 insertions(+), 24 deletions(-) -- 2.38.0
[PATCH] Revert "drm/i915/hwmon: Enable PL1 power limit"
This reverts commit 0349c41b05968befaffa5fbb7e73d0ee6004f610. 0349c41b0596 ("drm/i915/hwmon: Enable PL1 power limit") is incorrect and caused a major regression on ATSM. The change enabled the PL1 power limit but FW sets the default value of the PL1 limit to 0 which implies HW now works at minimum power and therefore the lowest effective frequency. This means all workloads now run slower resulting in even GuC FW load operations timing out, rendering ATSM unusable. A different solution to the original issue of the PL1 limit being disabled on ATSM is needed but till that is developed, revert 0349c41b0596. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 5 - 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 4683a5b96eff1..1225bc432f0d5 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -687,11 +687,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(>ddat_gt[i], ); } - - /* Enable PL1 power limit */ - if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- 2.38.0
[PATCH] drm/i915/hwmon: Enable PL1 power limit
Previous documentation suggested that PL1 power limit is always enabled. However we now find this not to be the case on some platforms (such as ATSM). Therefore enable PL1 power limit during hwmon initialization. Bspec: 51864 v2: Add Bspec reference (Gwan-gyeong) v3: Add Fixes tag Fixes: 99f55efb79114 ("drm/i915/hwmon: Power PL1 limit and TDP setting") Signed-off-by: Ashutosh Dixit Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/i915_hwmon.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 1225bc432f0d5..4683a5b96eff1 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(>ddat_gt[i], ); } + + /* Enable PL1 power limit */ + if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- 2.38.0
[PATCH] drm/i915/hwmon: Enable PL1 power limit
Previous documentation suggested that PL1 power limit is always enabled. However we now find this not to be the case on some platforms (such as ATSM). Therefore enable PL1 power limit during hwmon initialization. Bspec: 51864 v2: Add Bspec reference (Gwan-gyeong) Signed-off-by: Ashutosh Dixit Reviewed-by: Gwan-gyeong Mun --- drivers/gpu/drm/i915/i915_hwmon.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 1225bc432f0d5..4683a5b96eff1 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(>ddat_gt[i], ); } + + /* Enable PL1 power limit */ + if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- 2.38.0
[PATCH] drm/i915/hwmon: Enable PL1 power limit
Previous documentation suggested that PL1 power limit is always enabled. However we now find this not to be the case on some platforms (such as ATSM). Therefore enable PL1 power limit during hwmon initialization. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 1225bc432f0d5..4683a5b96eff1 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(>ddat_gt[i], ); } + + /* Enable PL1 power limit */ + if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- 2.38.0
[PATCH] drm/i915/hwmon: Display clamped PL1 limit
HW allows arbitrary PL1 limits to be set but silently clamps these values to "typical but not guaranteed" min/max values in pkg_power_sku register. Follow the same pattern for sysfs, allow arbitrary PL1 limits to be set but display clamped values when read, so that users see PL1 limits HW is likely using. Otherwise users think HW is using arbitrarily high/low PL1 limits they might have set. The previous write/read I1 power1_crit limit also follows the same clamping pattern. v2: Explain "why" in commit message and include bug link (Jani Nikula) Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7704 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c| 39 drivers/gpu/drm/i915/intel_mchbar_regs.h | 2 ++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index cca7a4350ec8f..1225bc432f0d5 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -359,6 +359,38 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan) } } +/* + * HW allows arbitrary PL1 limits to be set but silently clamps these values to + * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the + * same pattern for sysfs, allow arbitrary PL1 limits to be set but display + * clamped values when read. Write/read I1 also follows the same pattern. + */ +static int +hwm_power_max_read(struct hwm_drvdata *ddat, long *val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; + u64 r, min, max; + + *val = hwm_field_read_and_scale(ddat, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1, + hwmon->scl_shift_power, + SF_POWER); + + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read64(ddat->uncore, hwmon->rg.pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, r); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = REG_FIELD_GET(PKG_MAX_PWR, r); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + + if (min && max) + *val = clamp_t(u64, *val, min, max); + + return 0; +} + static int hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) { @@ -368,12 +400,7 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) switch (attr) { case hwmon_power_max: - *val = hwm_field_read_and_scale(ddat, - hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1, - hwmon->scl_shift_power, - SF_POWER); - return 0; + return hwm_power_max_read(ddat, val); case hwmon_power_rated_max: *val = hwm_field_read_and_scale(ddat, hwmon->rg.pkg_power_sku, diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h b/drivers/gpu/drm/i915/intel_mchbar_regs.h index f93e9af43ac35..73900c098d591 100644 --- a/drivers/gpu/drm/i915/intel_mchbar_regs.h +++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h @@ -194,6 +194,8 @@ */ #define PCU_PACKAGE_POWER_SKU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5930) #define PKG_PKG_TDP GENMASK_ULL(14, 0) +#define PKG_MIN_PWR GENMASK_ULL(30, 16) +#define PKG_MAX_PWR GENMASK_ULL(46, 32) #define PKG_MAX_WIN GENMASK_ULL(54, 48) #define PKG_MAX_WIN_X GENMASK_ULL(54, 53) #define PKG_MAX_WIN_Y GENMASK_ULL(52, 48) -- 2.38.0
[PATCH] drm/i915/hwmon: Display clamped PL1 limit
HW allows arbitrary PL1 limits to be set but silently clamps these values to "typical but not guaranteed" min/max values in pkg_power_sku register. Follow the same pattern for sysfs, allow arbitrary PL1 limits to be set but display clamped values when read. Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c| 39 drivers/gpu/drm/i915/intel_mchbar_regs.h | 2 ++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index cca7a4350ec8f..1225bc432f0d5 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -359,6 +359,38 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan) } } +/* + * HW allows arbitrary PL1 limits to be set but silently clamps these values to + * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the + * same pattern for sysfs, allow arbitrary PL1 limits to be set but display + * clamped values when read. Write/read I1 also follows the same pattern. + */ +static int +hwm_power_max_read(struct hwm_drvdata *ddat, long *val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; + u64 r, min, max; + + *val = hwm_field_read_and_scale(ddat, + hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1, + hwmon->scl_shift_power, + SF_POWER); + + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read64(ddat->uncore, hwmon->rg.pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, r); + min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); + max = REG_FIELD_GET(PKG_MAX_PWR, r); + max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); + + if (min && max) + *val = clamp_t(u64, *val, min, max); + + return 0; +} + static int hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) { @@ -368,12 +400,7 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val) switch (attr) { case hwmon_power_max: - *val = hwm_field_read_and_scale(ddat, - hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1, - hwmon->scl_shift_power, - SF_POWER); - return 0; + return hwm_power_max_read(ddat, val); case hwmon_power_rated_max: *val = hwm_field_read_and_scale(ddat, hwmon->rg.pkg_power_sku, diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h b/drivers/gpu/drm/i915/intel_mchbar_regs.h index f93e9af43ac35..73900c098d591 100644 --- a/drivers/gpu/drm/i915/intel_mchbar_regs.h +++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h @@ -194,6 +194,8 @@ */ #define PCU_PACKAGE_POWER_SKU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5930) #define PKG_PKG_TDP GENMASK_ULL(14, 0) +#define PKG_MIN_PWR GENMASK_ULL(30, 16) +#define PKG_MAX_PWR GENMASK_ULL(46, 32) #define PKG_MAX_WIN GENMASK_ULL(54, 48) #define PKG_MAX_WIN_X GENMASK_ULL(54, 53) #define PKG_MAX_WIN_Y GENMASK_ULL(52, 48) -- 2.38.0
[PATCH] drm/i915/hwmon: Don't use FIELD_PREP
FIELD_PREP and REG_FIELD_PREP have checks requiring a compile time constant mask. When the mask comes in as the argument of a function these checks can can fail depending on the compiler (gcc vs clang), optimization level, etc. Use a simpler version of FIELD_PREP which skips these checks. The checks are not needed because the mask is formed using REG_GENMASK (so is actually a compile time constant). v2: Split REG_FIELD_PREP into a macro with checks and one without and use the one without checks in i915_hwmon.c (Gwan-gyeong Mun) Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7354 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c| 2 +- drivers/gpu/drm/i915/i915_reg_defs.h | 17 +++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 9e97814930254..ae435b035229a 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -112,7 +112,7 @@ hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr, nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor); bits_to_clear = field_msk; - bits_to_set = FIELD_PREP(field_msk, nval); + bits_to_set = __REG_FIELD_PREP(field_msk, nval); hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr, bits_to_clear, bits_to_set); diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h index f1859046a9c48..dddacc8d48928 100644 --- a/drivers/gpu/drm/i915/i915_reg_defs.h +++ b/drivers/gpu/drm/i915/i915_reg_defs.h @@ -67,12 +67,17 @@ * * @return: @__val masked and shifted into the field defined by @__mask. */ -#define REG_FIELD_PREP(__mask, __val) \ - ((u32)typeof(__mask))(__val) << __bf_shf(__mask)) & (__mask)) + \ - BUILD_BUG_ON_ZERO(!__is_constexpr(__mask)) + \ - BUILD_BUG_ON_ZERO((__mask) == 0 || (__mask) > U32_MAX) + \ - BUILD_BUG_ON_ZERO(!IS_POWER_OF_2((__mask) + (1ULL << __bf_shf(__mask + \ - BUILD_BUG_ON_ZERO(__builtin_choose_expr(__is_constexpr(__val), (~((__mask) >> __bf_shf(__mask)) & (__val)), 0 +#define __REG_FIELD_PREP_CHK(__mask, __val) \ + (BUILD_BUG_ON_ZERO(!__is_constexpr(__mask)) + \ +BUILD_BUG_ON_ZERO((__mask) == 0 || (__mask) > U32_MAX) + \ +BUILD_BUG_ON_ZERO(!IS_POWER_OF_2((__mask) + (1ULL << __bf_shf(__mask + \ +BUILD_BUG_ON_ZERO(__builtin_choose_expr(__is_constexpr(__val), (~((__mask) >> __bf_shf(__mask)) & (__val)), 0))) + +#define __REG_FIELD_PREP(__mask, __val) \ + ((u32)typeof(__mask))(__val) << __bf_shf(__mask)) & (__mask + +#define REG_FIELD_PREP(__mask, __val) \ + (__REG_FIELD_PREP(__mask, __val) + __REG_FIELD_PREP_CHK(__mask, __val)) /** * REG_FIELD_GET() - Extract a u32 bitfield value -- 2.38.0
[PATCH] drm/i915/hwmon: Don't use FIELD_PREP
FIELD_PREP and REG_FIELD_PREP have checks requiring a compile time constant mask. When the mask comes in as the argument of a function these checks can can fail depending on the compiler (gcc vs clang), optimization level, etc. Use a simpler local version of FIELD_PREP which skips these checks. The checks are not needed because the mask is formed using REG_GENMASK (so is actually a compile time constant). Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7354 Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/i915_hwmon.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 9e97814930254..a3ec9a73a4e49 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -62,6 +62,12 @@ struct i915_hwmon { int scl_shift_time; }; +/* FIELD_PREP and REG_FIELD_PREP require a compile time constant mask */ +static u32 hwm_field_prep(u32 mask, u32 val) +{ + return (val << __bf_shf(mask)) & mask; +} + static void hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat, i915_reg_t reg, u32 clear, u32 set) @@ -112,7 +118,7 @@ hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr, nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor); bits_to_clear = field_msk; - bits_to_set = FIELD_PREP(field_msk, nval); + bits_to_set = hwm_field_prep(field_msk, nval); hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr, bits_to_clear, bits_to_set); -- 2.38.0
[PATCH 2/5] drm/i915: Use GEN12_RPSTAT register for GT freq
From: Don Hiatt On GEN12+ use GEN12_RPSTAT register to get actual resolved GT freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if GT is in RC6. v2: - Fixed review comments(Ashutosh) - Added function intel_rps_read_rpstat_fw to read RPSTAT without forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko) v3: - Updated commit title and message for more clarity (Ashutosh) - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in read_cagf (Ashutosh) v4: Remove GEN12_CAGF_SHIFT and use REG_FIELD_GET (Rodrigo) Cc: Don Hiatt Cc: Andi Shyti Signed-off-by: Don Hiatt Signed-off-by: Badal Nilawar Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 32 + drivers/gpu/drm/i915/gt/intel_rps.h | 2 ++ drivers/gpu/drm/i915/i915_pmu.c | 3 +-- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 35c039573294c..f8c4f758ac0b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1539,6 +1539,7 @@ #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) +#define GEN12_CAGF_MASK REG_GENMASK(19, 11) #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 5061b5b3cece7..02f69cbae5162 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2066,12 +2066,34 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } +u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); +} + +u32 intel_rps_read_rpstat(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); +} + u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER(i915) >= 12) + cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); @@ -2091,7 +2113,9 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + if (GRAPHICS_VER(i915) >= 12) { + freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); @@ -2257,7 +2281,7 @@ static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpstat = intel_rps_read_rpstat(rps); rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; @@ -2392,7 +2416,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) drm_printf(p, "PM MASK=0x%08x\n", pm_mask); drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); - drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, caps.min_freq)); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 110300dfd4383..9e1cad9ba0e9c 100644 --- a/dri
[PATCH 3/5] drm/i915/mtl: Modify CAGF functions for MTL
From: Badal Nilawar Update CAGF functions for MTL to get actual resolved frequency of 3D and SAMedia. v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR) Move MTL branches in cagf functions to top (MattR) Fix commit message (Andi) v3: Added comment about registers not needing forcewake for Gen12+ and returning 0 freq in RC6 v4: Use REG_FIELD_GET and uncore (Rodrigo) Bspec: 66300 Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar Reviewed-by: Ashutosh Dixit Acked-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index f8c4f758ac0b1..d8dbd0ac3b064 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -21,6 +21,10 @@ */ #define PERF_REG(offset) _MMIO(offset) +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_CAGF_MASKREG_GENMASK(8, 0) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 02f69cbae5162..eb3343a217947 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2091,7 +2091,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); + else if (GRAPHICS_VER(i915) >= 12) cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = REG_FIELD_GET(RPE_MASK, rpstat); @@ -2113,7 +2115,13 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (GRAPHICS_VER(i915) >= 12) { + /* +* For Gen12+ reading freq from HW does not need a forcewake and +* registers will return 0 freq when GT is in RC6 +*/ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + } else if (GRAPHICS_VER(i915) >= 12) { freq = intel_uncore_read(uncore, GEN12_RPSTAT1); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); -- 2.38.0
[PATCH 4/5] drm/i915/gt: Use RC6 residency types as arguments to residency functions
Previously RC6 residency functions directly accepted RC6 residency register MMIO offsets (there are four RC6 residency registers). This worked but required an assumption on the residency register layout so was not future proof. Therefore change RC6 residency functions to accept RC6 residency types instead of register MMIO offsets. The knowledge of register offsets as well as ID to offset mapping is now maintained solely in intel_rc6 and can be tailored for different platforms and different register layouts as need arises. v2: Address review comments by Jani N - Change residency functions to accept RC6 residency types instead of register ID's - s/intel_rc6_print_rc5_res/intel_rc6_print_residency/ - Remove "const enum" in function arguments - Naming: intel_rc6_* for enum - Use INTEL_RC6_RES_MAX and other minor changes v3: Don't include intel_rc6_types.h in intel_rc6.h (Jani) Suggested-by: Rodrigo Vivi Suggested-by: Jani Nikula Reported-by: Jani Nikula Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_rc6.c | 55 +++ drivers/gpu/drm/i915/gt/intel_rc6.h | 11 ++-- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 - drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 6 +- 7 files changed, 72 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 979e602946549..5d6b346831393 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); -static void print_rc6_res(struct seq_file *m, - const char *title, - const i915_reg_t reg) -{ - struct intel_gt *gt = m->private; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - seq_printf(m, "%s %u (%llu us)\n", title, - intel_uncore_read(gt->uncore, reg), - intel_rc6_residency_us(>rc6, reg)); -} - static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; @@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m) seq_printf(m, "Media Power Well: %s\n", (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + intel_rc6_print_residency(m, "Render RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "Media RC6 residency since boot:", INTEL_RC6_RES_VLV_MEDIA); return fw_domains_show(m, NULL); } @@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m) } /* Not exactly sure what this is */ - print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", - GEN6_GT_GFX_RC6_LOCKED); - print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); - print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since boot:", + INTEL_RC6_RES_RC6_LOCKED); + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "RC6+ residency since boot:", INTEL_RC6_RES_RC6p); + intel_rc6_print_residency(m, "RC6++ residency since boot:", INTEL_RC6_RES_RC6pp); if (GRAPHICS_VER(i915) <= 7) { seq_printf(m, "RC6 voltage: %dmV\n", diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 9041609523697..19a6e052c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) #ifdef CONFIG_PM -static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) +static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id) { intel_wakeref_t wakeref; u64 res = 0; with_intel_runtime_pm(gt->uncore->rpm, wakeref) - res = intel_rc6_residency_us(>rc
[PATCH 5/5] drm/i915/mtl: C6 residency and C state type for MTL SAMedia
From: Badal Nilawar Add support for C6 residency and C state type for MTL SAMedia. Also add mtl_drpc. v2: Fixed review comments (Ashutosh) v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R) Remove MTL_CC_SHIFT (Ashutosh) Adapt to RC6 residency register code refactor (Jani N) v4: Move MTL branch to top in drpc_show v5: Use FORCEWAKE_MT identical to gen6_drpc (Ashutosh) v6: Add MISSING_CASE for gt_core_status switch statement (Rodrigo) Change state name for MTL_CC0 to C0 (from "on") (Rodrigo) v7: Change state name for MTL_CC0 to RC0 (Rodrigo) Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 59 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 ++ drivers/gpu/drm/i915/gt/intel_rc6.c | 17 -- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 5d6b346831393..83df4cd5e06cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -256,6 +256,61 @@ static int ilk_drpc(struct seq_file *m) return 0; } +static int mtl_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, mt_fwake_req; + u32 mtl_powergate_enable = 0, mtl_powergate_status = 0; + + mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT); + gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + mtl_powergate_status = intel_uncore_read(uncore, +GEN9_PWRGT_DOMAIN_STATUS); + + seq_printf(m, "RC6 Enabled: %s\n", + str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (gt->type == GT_MEDIA) { + seq_printf(m, "Media Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } else { + seq_printf(m, "Render Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_RENDER_PG_ENABLE)); + } + + seq_puts(m, "Current RC state: "); + switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) { + case MTL_CC0: + seq_puts(m, "RC0\n"); + break; + case MTL_CC6: + seq_puts(m, "RC6\n"); + break; + default: + MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status)); + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req); + if (gt->type == GT_MEDIA) + seq_printf(m, "Media Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + else + seq_printf(m, "Render Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + + /* Works for both render and media gt's */ + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + + return fw_domains_show(m, NULL); +} + static int drpc_show(struct seq_file *m, void *unused) { struct intel_gt *gt = m->private; @@ -264,7 +319,9 @@ static int drpc_show(struct seq_file *m, void *unused) int err = -ENODEV; with_intel_runtime_pm(gt->uncore->rpm, wakeref) { - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + err = mtl_drpc(m); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_drpc(m); else if (GRAPHICS_VER(i915) >= 6) err = gen6_drpc(m); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d8dbd0ac3b064..a0ddaf243593c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -24,6 +24,9 @@ /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ #define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) #define MTL_CAGF_MASKREG_GENMASK(8, 0) +#define MTL_CC0 0x0 +#define MTL_CC6 0x3 +#define MTL_CC_MASK
[PATCH 1/5] drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf
Instead of masks/shifts settle on REG_FIELD_GET as the standard way to extract reg fields. This allows future patches touching this code to also consistently use REG_FIELD_GET and friends. Suggested-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 12 drivers/gpu/drm/i915/gt/intel_rps.c | 11 +-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 40d0a3be42acf..979e602946549 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -307,7 +307,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> MEMSTAT_VID_SHIFT); drm_printf(p, "Current P-state: %d\n", - (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rgvstat)); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { u32 rpmodectl, freq_sts; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 36d95b79022c0..35c039573294c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -794,12 +794,9 @@ #define GEN6_RP_DOWN_TIMEOUT _MMIO(0xa010) #define GEN6_RP_INTERRUPT_LIMITS _MMIO(0xa014) #define GEN6_RPSTAT1 _MMIO(0xa01c) -#define GEN6_CAGF_SHIFT 8 -#define HSW_CAGF_SHIFT 7 -#define GEN9_CAGF_SHIFT 23 -#define GEN6_CAGF_MASK (0x7f << GEN6_CAGF_SHIFT) -#define HSW_CAGF_MASK(0x7f << HSW_CAGF_SHIFT) -#define GEN9_CAGF_MASK (0x1ff << GEN9_CAGF_SHIFT) +#define GEN6_CAGF_MASK REG_GENMASK(14, 8) +#define HSW_CAGF_MASKREG_GENMASK(13, 7) +#define GEN9_CAGF_MASK REG_GENMASK(31, 23) #define GEN6_RP_CONTROL_MMIO(0xa024) #define GEN6_RP_MEDIA_TURBO (1 << 11) #define GEN6_RP_MEDIA_MODE_MASK (3 << 9) @@ -1370,8 +1367,7 @@ #define MEMSTAT_ILK_MMIO(0x111f8) #define MEMSTAT_VID_MASK 0x7f00 #define MEMSTAT_VID_SHIFT8 -#define MEMSTAT_PSTATE_MASK 0x00f8 -#define MEMSTAT_PSTATE_SHIFT 3 +#define MEMSTAT_PSTATE_MASK REG_GENMASK(7, 3) #define MEMSTAT_MON_ACTV (1 << 2) #define MEMSTAT_SRC_CTL_MASK 0x0003 #define MEMSTAT_SRC_CTL_CORE 0 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 070005dd0da47..5061b5b3cece7 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2072,16 +2072,15 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) u32 cagf; if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - cagf = (rpstat >> 8) & 0xff; + cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 6) - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); else - cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> - MEMSTAT_PSTATE_SHIFT); + cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); return cagf; } -- 2.38.0
[PATCH 0/5] i915: CAGF and RC6 changes for MTL
This series includes the code changes to get CAGF, RC State and C6 Residency of MTL. v3: Included "Use GEN12 RPSTAT register" patch v4: - Rebased - Dropped "Use GEN12 RPSTAT register" patch from this series going to send separate series for it v5: - Included "drm/i915/gt: Change RC6 residency functions to accept register ID's" based on code review feedback v6: - Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency functions to accept register ID's" - Re-add "drm/i915: Use GEN12_RPSTAT register for GT freq" to this series v7: Rebuild, identical to v6 v8: - Add "drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf" to the series (based on Rodrigo's review) to consistently use REG_FIELD_GET - Minor changes to other patches, please see individual patches for changelogs v9: Rebuild, identical to v8 v10: Address review comments from Rodrigo on Patch 5 v11: Change state name for MTL_CC0 to RC0 in Patch 5 Ashutosh Dixit (2): drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf drm/i915/gt: Use RC6 residency types as arguments to residency functions Badal Nilawar (2): drm/i915/mtl: Modify CAGF functions for MTL drm/i915/mtl: C6 residency and C state type for MTL SAMedia Don Hiatt (1): drm/i915: Use GEN12_RPSTAT register for GT freq drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 88 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 22 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 +-- drivers/gpu/drm/i915/gt/intel_rc6.c | 64 +- drivers/gpu/drm/i915/gt/intel_rc6.h | 11 ++- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++- drivers/gpu/drm/i915/gt/intel_rps.c | 51 --- drivers/gpu/drm/i915/gt/intel_rps.h | 2 + drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 9 +- 10 files changed, 198 insertions(+), 82 deletions(-) -- 2.38.0
[PATCH 3/5] drm/i915/mtl: Modify CAGF functions for MTL
From: Badal Nilawar Update CAGF functions for MTL to get actual resolved frequency of 3D and SAMedia. v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR) Move MTL branches in cagf functions to top (MattR) Fix commit message (Andi) v3: Added comment about registers not needing forcewake for Gen12+ and returning 0 freq in RC6 v4: Use REG_FIELD_GET and uncore (Rodrigo) Bspec: 66300 Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar Reviewed-by: Ashutosh Dixit Acked-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index f8c4f758ac0b1..d8dbd0ac3b064 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -21,6 +21,10 @@ */ #define PERF_REG(offset) _MMIO(offset) +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_CAGF_MASKREG_GENMASK(8, 0) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 02f69cbae5162..eb3343a217947 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2091,7 +2091,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); + else if (GRAPHICS_VER(i915) >= 12) cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = REG_FIELD_GET(RPE_MASK, rpstat); @@ -2113,7 +2115,13 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (GRAPHICS_VER(i915) >= 12) { + /* +* For Gen12+ reading freq from HW does not need a forcewake and +* registers will return 0 freq when GT is in RC6 +*/ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + } else if (GRAPHICS_VER(i915) >= 12) { freq = intel_uncore_read(uncore, GEN12_RPSTAT1); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); -- 2.38.0
[PATCH 2/5] drm/i915: Use GEN12_RPSTAT register for GT freq
From: Don Hiatt On GEN12+ use GEN12_RPSTAT register to get actual resolved GT freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if GT is in RC6. v2: - Fixed review comments(Ashutosh) - Added function intel_rps_read_rpstat_fw to read RPSTAT without forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko) v3: - Updated commit title and message for more clarity (Ashutosh) - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in read_cagf (Ashutosh) v4: Remove GEN12_CAGF_SHIFT and use REG_FIELD_GET (Rodrigo) Cc: Don Hiatt Cc: Andi Shyti Signed-off-by: Don Hiatt Signed-off-by: Badal Nilawar Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 32 + drivers/gpu/drm/i915/gt/intel_rps.h | 2 ++ drivers/gpu/drm/i915/i915_pmu.c | 3 +-- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 35c039573294c..f8c4f758ac0b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1539,6 +1539,7 @@ #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) +#define GEN12_CAGF_MASK REG_GENMASK(19, 11) #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 5061b5b3cece7..02f69cbae5162 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2066,12 +2066,34 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } +u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); +} + +u32 intel_rps_read_rpstat(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); +} + u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER(i915) >= 12) + cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); @@ -2091,7 +2113,9 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + if (GRAPHICS_VER(i915) >= 12) { + freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); @@ -2257,7 +2281,7 @@ static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpstat = intel_rps_read_rpstat(rps); rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; @@ -2392,7 +2416,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) drm_printf(p, "PM MASK=0x%08x\n", pm_mask); drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); - drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, caps.min_freq)); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 110300dfd4383..9e1cad9ba0e9c 100644 --- a/dri
[PATCH 5/5] drm/i915/mtl: C6 residency and C state type for MTL SAMedia
From: Badal Nilawar Add support for C6 residency and C state type for MTL SAMedia. Also add mtl_drpc. v2: Fixed review comments (Ashutosh) v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R) Remove MTL_CC_SHIFT (Ashutosh) Adapt to RC6 residency register code refactor (Jani N) v4: Move MTL branch to top in drpc_show v5: Use FORCEWAKE_MT identical to gen6_drpc (Ashutosh) v6: Add MISSING_CASE for gt_core_status switch statement (Rodrigo) Change state name for MTL_CC0 to C0 (from "on") (Rodrigo) Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 59 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 ++ drivers/gpu/drm/i915/gt/intel_rc6.c | 17 -- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 5d6b346831393..522049f053e8a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -256,6 +256,61 @@ static int ilk_drpc(struct seq_file *m) return 0; } +static int mtl_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, mt_fwake_req; + u32 mtl_powergate_enable = 0, mtl_powergate_status = 0; + + mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT); + gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + mtl_powergate_status = intel_uncore_read(uncore, +GEN9_PWRGT_DOMAIN_STATUS); + + seq_printf(m, "RC6 Enabled: %s\n", + str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (gt->type == GT_MEDIA) { + seq_printf(m, "Media Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } else { + seq_printf(m, "Render Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_RENDER_PG_ENABLE)); + } + + seq_puts(m, "Current RC state: "); + switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) { + case MTL_CC0: + seq_puts(m, "C0\n"); + break; + case MTL_CC6: + seq_puts(m, "RC6\n"); + break; + default: + MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status)); + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req); + if (gt->type == GT_MEDIA) + seq_printf(m, "Media Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + else + seq_printf(m, "Render Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + + /* Works for both render and media gt's */ + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + + return fw_domains_show(m, NULL); +} + static int drpc_show(struct seq_file *m, void *unused) { struct intel_gt *gt = m->private; @@ -264,7 +319,9 @@ static int drpc_show(struct seq_file *m, void *unused) int err = -ENODEV; with_intel_runtime_pm(gt->uncore->rpm, wakeref) { - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + err = mtl_drpc(m); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_drpc(m); else if (GRAPHICS_VER(i915) >= 6) err = gen6_drpc(m); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d8dbd0ac3b064..a0ddaf243593c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -24,6 +24,9 @@ /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ #define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) #define MTL_CAGF_MASKREG_GENMASK(8, 0) +#define MTL_CC0 0x0 +#define MTL_CC6 0x3 +#define MTL_CC_MASK REG_GENMASK(12, 9) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0
[PATCH 4/5] drm/i915/gt: Use RC6 residency types as arguments to residency functions
Previously RC6 residency functions directly accepted RC6 residency register MMIO offsets (there are four RC6 residency registers). This worked but required an assumption on the residency register layout so was not future proof. Therefore change RC6 residency functions to accept RC6 residency types instead of register MMIO offsets. The knowledge of register offsets as well as ID to offset mapping is now maintained solely in intel_rc6 and can be tailored for different platforms and different register layouts as need arises. v2: Address review comments by Jani N - Change residency functions to accept RC6 residency types instead of register ID's - s/intel_rc6_print_rc5_res/intel_rc6_print_residency/ - Remove "const enum" in function arguments - Naming: intel_rc6_* for enum - Use INTEL_RC6_RES_MAX and other minor changes v3: Don't include intel_rc6_types.h in intel_rc6.h (Jani) Suggested-by: Rodrigo Vivi Suggested-by: Jani Nikula Reported-by: Jani Nikula Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_rc6.c | 55 +++ drivers/gpu/drm/i915/gt/intel_rc6.h | 11 ++-- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 - drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 6 +- 7 files changed, 72 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 979e602946549..5d6b346831393 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); -static void print_rc6_res(struct seq_file *m, - const char *title, - const i915_reg_t reg) -{ - struct intel_gt *gt = m->private; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - seq_printf(m, "%s %u (%llu us)\n", title, - intel_uncore_read(gt->uncore, reg), - intel_rc6_residency_us(>rc6, reg)); -} - static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; @@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m) seq_printf(m, "Media Power Well: %s\n", (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + intel_rc6_print_residency(m, "Render RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "Media RC6 residency since boot:", INTEL_RC6_RES_VLV_MEDIA); return fw_domains_show(m, NULL); } @@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m) } /* Not exactly sure what this is */ - print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", - GEN6_GT_GFX_RC6_LOCKED); - print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); - print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since boot:", + INTEL_RC6_RES_RC6_LOCKED); + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "RC6+ residency since boot:", INTEL_RC6_RES_RC6p); + intel_rc6_print_residency(m, "RC6++ residency since boot:", INTEL_RC6_RES_RC6pp); if (GRAPHICS_VER(i915) <= 7) { seq_printf(m, "RC6 voltage: %dmV\n", diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 9041609523697..19a6e052c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) #ifdef CONFIG_PM -static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) +static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id) { intel_wakeref_t wakeref; u64 res = 0; with_intel_runtime_pm(gt->uncore->rpm, wakeref) - res = intel_rc6_residency_us(>rc
[PATCH 0/5] i915: CAGF and RC6 changes for MTL
This series includes the code changes to get CAGF, RC State and C6 Residency of MTL. v3: Included "Use GEN12 RPSTAT register" patch v4: - Rebased - Dropped "Use GEN12 RPSTAT register" patch from this series going to send separate series for it v5: - Included "drm/i915/gt: Change RC6 residency functions to accept register ID's" based on code review feedback v6: - Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency functions to accept register ID's" - Re-add "drm/i915: Use GEN12_RPSTAT register for GT freq" to this series v7: Rebuild, identical to v6 v8: - Add "drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf" to the series (based on Rodrigo's review) to consistently use REG_FIELD_GET - Minor changes to other patches, please see individual patches for changelogs v9: Rebuild, identical to v8 v10: Address review comments from Rodrigo on Patch 5 Ashutosh Dixit (2): drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf drm/i915/gt: Use RC6 residency types as arguments to residency functions Badal Nilawar (2): drm/i915/mtl: Modify CAGF functions for MTL drm/i915/mtl: C6 residency and C state type for MTL SAMedia Don Hiatt (1): drm/i915: Use GEN12_RPSTAT register for GT freq drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 88 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 22 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 +-- drivers/gpu/drm/i915/gt/intel_rc6.c | 64 +- drivers/gpu/drm/i915/gt/intel_rc6.h | 11 ++- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++- drivers/gpu/drm/i915/gt/intel_rps.c | 51 --- drivers/gpu/drm/i915/gt/intel_rps.h | 2 + drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 9 +- 10 files changed, 198 insertions(+), 82 deletions(-) -- 2.38.0
[PATCH 1/5] drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf
Instead of masks/shifts settle on REG_FIELD_GET as the standard way to extract reg fields. This allows future patches touching this code to also consistently use REG_FIELD_GET and friends. Suggested-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 12 drivers/gpu/drm/i915/gt/intel_rps.c | 11 +-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 40d0a3be42acf..979e602946549 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -307,7 +307,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> MEMSTAT_VID_SHIFT); drm_printf(p, "Current P-state: %d\n", - (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rgvstat)); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { u32 rpmodectl, freq_sts; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 36d95b79022c0..35c039573294c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -794,12 +794,9 @@ #define GEN6_RP_DOWN_TIMEOUT _MMIO(0xa010) #define GEN6_RP_INTERRUPT_LIMITS _MMIO(0xa014) #define GEN6_RPSTAT1 _MMIO(0xa01c) -#define GEN6_CAGF_SHIFT 8 -#define HSW_CAGF_SHIFT 7 -#define GEN9_CAGF_SHIFT 23 -#define GEN6_CAGF_MASK (0x7f << GEN6_CAGF_SHIFT) -#define HSW_CAGF_MASK(0x7f << HSW_CAGF_SHIFT) -#define GEN9_CAGF_MASK (0x1ff << GEN9_CAGF_SHIFT) +#define GEN6_CAGF_MASK REG_GENMASK(14, 8) +#define HSW_CAGF_MASKREG_GENMASK(13, 7) +#define GEN9_CAGF_MASK REG_GENMASK(31, 23) #define GEN6_RP_CONTROL_MMIO(0xa024) #define GEN6_RP_MEDIA_TURBO (1 << 11) #define GEN6_RP_MEDIA_MODE_MASK (3 << 9) @@ -1370,8 +1367,7 @@ #define MEMSTAT_ILK_MMIO(0x111f8) #define MEMSTAT_VID_MASK 0x7f00 #define MEMSTAT_VID_SHIFT8 -#define MEMSTAT_PSTATE_MASK 0x00f8 -#define MEMSTAT_PSTATE_SHIFT 3 +#define MEMSTAT_PSTATE_MASK REG_GENMASK(7, 3) #define MEMSTAT_MON_ACTV (1 << 2) #define MEMSTAT_SRC_CTL_MASK 0x0003 #define MEMSTAT_SRC_CTL_CORE 0 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 070005dd0da47..5061b5b3cece7 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2072,16 +2072,15 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) u32 cagf; if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - cagf = (rpstat >> 8) & 0xff; + cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 6) - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); else - cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> - MEMSTAT_PSTATE_SHIFT); + cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); return cagf; } -- 2.38.0
[PATCH 1/5] drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf
Instead of masks/shifts settle on REG_FIELD_GET as the standard way to extract reg fields. This allows future patches touching this code to also consistently use REG_FIELD_GET and friends. Suggested-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 12 drivers/gpu/drm/i915/gt/intel_rps.c | 11 +-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 40d0a3be42acf..979e602946549 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -307,7 +307,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p) drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >> MEMSTAT_VID_SHIFT); drm_printf(p, "Current P-state: %d\n", - (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT); + REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rgvstat)); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { u32 rpmodectl, freq_sts; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 36d95b79022c0..35c039573294c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -794,12 +794,9 @@ #define GEN6_RP_DOWN_TIMEOUT _MMIO(0xa010) #define GEN6_RP_INTERRUPT_LIMITS _MMIO(0xa014) #define GEN6_RPSTAT1 _MMIO(0xa01c) -#define GEN6_CAGF_SHIFT 8 -#define HSW_CAGF_SHIFT 7 -#define GEN9_CAGF_SHIFT 23 -#define GEN6_CAGF_MASK (0x7f << GEN6_CAGF_SHIFT) -#define HSW_CAGF_MASK(0x7f << HSW_CAGF_SHIFT) -#define GEN9_CAGF_MASK (0x1ff << GEN9_CAGF_SHIFT) +#define GEN6_CAGF_MASK REG_GENMASK(14, 8) +#define HSW_CAGF_MASKREG_GENMASK(13, 7) +#define GEN9_CAGF_MASK REG_GENMASK(31, 23) #define GEN6_RP_CONTROL_MMIO(0xa024) #define GEN6_RP_MEDIA_TURBO (1 << 11) #define GEN6_RP_MEDIA_MODE_MASK (3 << 9) @@ -1370,8 +1367,7 @@ #define MEMSTAT_ILK_MMIO(0x111f8) #define MEMSTAT_VID_MASK 0x7f00 #define MEMSTAT_VID_SHIFT8 -#define MEMSTAT_PSTATE_MASK 0x00f8 -#define MEMSTAT_PSTATE_SHIFT 3 +#define MEMSTAT_PSTATE_MASK REG_GENMASK(7, 3) #define MEMSTAT_MON_ACTV (1 << 2) #define MEMSTAT_SRC_CTL_MASK 0x0003 #define MEMSTAT_SRC_CTL_CORE 0 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2a..5bd6671554a6e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2074,16 +2074,15 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) u32 cagf; if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - cagf = (rpstat >> 8) & 0xff; + cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) - cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) - cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; + cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 6) - cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; + cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat); else - cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >> - MEMSTAT_PSTATE_SHIFT); + cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rpstat)); return cagf; } -- 2.38.0
[PATCH 5/5] drm/i915/mtl: C6 residency and C state type for MTL SAMedia
From: Badal Nilawar Add support for C6 residency and C state type for MTL SAMedia. Also add mtl_drpc. v2: Fixed review comments (Ashutosh) v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R) Remove MTL_CC_SHIFT (Ashutosh) Adapt to RC6 residency register code refactor (Jani N) v4: Move MTL branch to top in drpc_show v5: Use FORCEWAKE_MT identical to gen6_drpc (Ashutosh) Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 58 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 ++ drivers/gpu/drm/i915/gt/intel_rc6.c | 17 -- 3 files changed, 75 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 5d6b346831393..f15a7486a9866 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -256,6 +256,60 @@ static int ilk_drpc(struct seq_file *m) return 0; } +static int mtl_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, mt_fwake_req; + u32 mtl_powergate_enable = 0, mtl_powergate_status = 0; + + mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT); + gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + mtl_powergate_status = intel_uncore_read(uncore, +GEN9_PWRGT_DOMAIN_STATUS); + + seq_printf(m, "RC6 Enabled: %s\n", + str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (gt->type == GT_MEDIA) { + seq_printf(m, "Media Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } else { + seq_printf(m, "Render Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_RENDER_PG_ENABLE)); + } + + seq_puts(m, "Current RC state: "); + switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) { + case MTL_CC0: + seq_puts(m, "on\n"); + break; + case MTL_CC6: + seq_puts(m, "RC6\n"); + break; + default: + seq_puts(m, "Unknown\n"); + break; + } + + seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req); + if (gt->type == GT_MEDIA) + seq_printf(m, "Media Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + else + seq_printf(m, "Render Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + + /* Works for both render and media gt's */ + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + + return fw_domains_show(m, NULL); +} + static int drpc_show(struct seq_file *m, void *unused) { struct intel_gt *gt = m->private; @@ -264,7 +318,9 @@ static int drpc_show(struct seq_file *m, void *unused) int err = -ENODEV; with_intel_runtime_pm(gt->uncore->rpm, wakeref) { - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + err = mtl_drpc(m); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_drpc(m); else if (GRAPHICS_VER(i915) >= 6) err = gen6_drpc(m); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index d8dbd0ac3b064..a0ddaf243593c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -24,6 +24,9 @@ /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ #define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) #define MTL_CAGF_MASKREG_GENMASK(8, 0) +#define MTL_CC0 0x0 +#define MTL_CC6 0x3 +#define MTL_CC_MASK REG_GENMASK(12, 9) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) @@ -1512,6 +1515,8 @@ #define FORCEWAKE_MEDIA_VLV_MMIO(0x1300b8) #define FORCEWAKE_ACK_MEDIA_VLV_MMIO(0x1300bc) +#define MTL_MEDIA_MC6
[PATCH 4/5] drm/i915/gt: Use RC6 residency types as arguments to residency functions
Previously RC6 residency functions directly accepted RC6 residency register MMIO offsets (there are four RC6 residency registers). This worked but required an assumption on the residency register layout so was not future proof. Therefore change RC6 residency functions to accept RC6 residency types instead of register MMIO offsets. The knowledge of register offsets as well as ID to offset mapping is now maintained solely in intel_rc6 and can be tailored for different platforms and different register layouts as need arises. v2: Address review comments by Jani N - Change residency functions to accept RC6 residency types instead of register ID's - s/intel_rc6_print_rc5_res/intel_rc6_print_residency/ - Remove "const enum" in function arguments - Naming: intel_rc6_* for enum - Use INTEL_RC6_RES_MAX and other minor changes v3: Don't include intel_rc6_types.h in intel_rc6.h (Jani) Suggested-by: Rodrigo Vivi Suggested-by: Jani Nikula Reported-by: Jani Nikula Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_rc6.c | 55 +++ drivers/gpu/drm/i915/gt/intel_rc6.h | 11 ++-- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 - drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 6 +- 7 files changed, 72 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 979e602946549..5d6b346831393 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); -static void print_rc6_res(struct seq_file *m, - const char *title, - const i915_reg_t reg) -{ - struct intel_gt *gt = m->private; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - seq_printf(m, "%s %u (%llu us)\n", title, - intel_uncore_read(gt->uncore, reg), - intel_rc6_residency_us(>rc6, reg)); -} - static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; @@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m) seq_printf(m, "Media Power Well: %s\n", (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + intel_rc6_print_residency(m, "Render RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "Media RC6 residency since boot:", INTEL_RC6_RES_VLV_MEDIA); return fw_domains_show(m, NULL); } @@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m) } /* Not exactly sure what this is */ - print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", - GEN6_GT_GFX_RC6_LOCKED); - print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); - print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since boot:", + INTEL_RC6_RES_RC6_LOCKED); + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "RC6+ residency since boot:", INTEL_RC6_RES_RC6p); + intel_rc6_print_residency(m, "RC6++ residency since boot:", INTEL_RC6_RES_RC6pp); if (GRAPHICS_VER(i915) <= 7) { seq_printf(m, "RC6 voltage: %dmV\n", diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 9041609523697..19a6e052c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) #ifdef CONFIG_PM -static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) +static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id) { intel_wakeref_t wakeref; u64 res = 0; with_intel_runtime_pm(gt->uncore->rpm, wakeref) - res = intel_rc6_residency_us(>rc6, reg); + res = i
[PATCH 2/5] drm/i915: Use GEN12_RPSTAT register for GT freq
From: Don Hiatt On GEN12+ use GEN12_RPSTAT register to get actual resolved GT freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if GT is in RC6. v2: - Fixed review comments(Ashutosh) - Added function intel_rps_read_rpstat_fw to read RPSTAT without forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko) v3: - Updated commit title and message for more clarity (Ashutosh) - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in read_cagf (Ashutosh) v4: Remove GEN12_CAGF_SHIFT and use REG_FIELD_GET (Rodrigo) Cc: Don Hiatt Cc: Andi Shyti Signed-off-by: Don Hiatt Signed-off-by: Badal Nilawar Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_rps.c | 32 + drivers/gpu/drm/i915/gt/intel_rps.h | 2 ++ drivers/gpu/drm/i915/i915_pmu.c | 3 +-- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 35c039573294c..f8c4f758ac0b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1539,6 +1539,7 @@ #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) +#define GEN12_CAGF_MASK REG_GENMASK(19, 11) #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 5bd6671554a6e..da6b969f554b6 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2068,12 +2068,34 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } +u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); +} + +u32 intel_rps_read_rpstat(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); +} + u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER(i915) >= 12) + cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = REG_FIELD_GET(RPE_MASK, rpstat); else if (GRAPHICS_VER(i915) >= 9) cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat); @@ -2093,7 +2115,9 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + if (GRAPHICS_VER(i915) >= 12) { + freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); @@ -2259,7 +2283,7 @@ static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpstat = intel_rps_read_rpstat(rps); rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; @@ -2394,7 +2418,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) drm_printf(p, "PM MASK=0x%08x\n", pm_mask); drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); - drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, caps.min_freq)); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 110300dfd4383..9e1cad9ba0e9c 100644 --- a/drivers/gpu/drm/i915/gt/intel_r
[PATCH 3/5] drm/i915/mtl: Modify CAGF functions for MTL
From: Badal Nilawar Update CAGF functions for MTL to get actual resolved frequency of 3D and SAMedia. v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR) Move MTL branches in cagf functions to top (MattR) Fix commit message (Andi) v3: Added comment about registers not needing forcewake for Gen12+ and returning 0 freq in RC6 v4: Use REG_FIELD_GET and uncore (Rodrigo) Bspec: 66300 Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index f8c4f758ac0b1..d8dbd0ac3b064 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -21,6 +21,10 @@ */ #define PERF_REG(offset) _MMIO(offset) +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_CAGF_MASKREG_GENMASK(8, 0) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index da6b969f554b6..63cc7c538401e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2093,7 +2093,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat); + else if (GRAPHICS_VER(i915) >= 12) cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat); else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = REG_FIELD_GET(RPE_MASK, rpstat); @@ -2115,7 +2117,13 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (GRAPHICS_VER(i915) >= 12) { + /* +* For Gen12+ reading freq from HW does not need a forcewake and +* registers will return 0 freq when GT is in RC6 +*/ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + } else if (GRAPHICS_VER(i915) >= 12) { freq = intel_uncore_read(uncore, GEN12_RPSTAT1); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); -- 2.38.0
[PATCH 0/5] i915: CAGF and RC6 changes for MTL
This series includes the code changes to get CAGF, RC State and C6 Residency of MTL. v3: Included "Use GEN12 RPSTAT register" patch v4: - Rebased - Dropped "Use GEN12 RPSTAT register" patch from this series going to send separate series for it v5: - Included "drm/i915/gt: Change RC6 residency functions to accept register ID's" based on code review feedback v6: - Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency functions to accept register ID's" - Re-add "drm/i915: Use GEN12_RPSTAT register for GT freq" to this series v7: Rebuild, identical to v6 v8: - Add "drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf" to the series (based on Rodrigo's review) to consistently use REG_FIELD_GET - Minor changes to other patches, please see individual patches for changelogs Ashutosh Dixit (2): drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf drm/i915/gt: Use RC6 residency types as arguments to residency functions Badal Nilawar (2): drm/i915/mtl: Modify CAGF functions for MTL drm/i915/mtl: C6 residency and C state type for MTL SAMedia Don Hiatt (1): drm/i915: Use GEN12_RPSTAT register for GT freq drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 87 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 22 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 +-- drivers/gpu/drm/i915/gt/intel_rc6.c | 64 +- drivers/gpu/drm/i915/gt/intel_rc6.h | 11 ++- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++- drivers/gpu/drm/i915/gt/intel_rps.c | 51 --- drivers/gpu/drm/i915/gt/intel_rps.h | 2 + drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 9 +- 10 files changed, 197 insertions(+), 82 deletions(-) -- 2.38.0
[PATCH 2/4] drm/i915/mtl: Modify CAGF functions for MTL
From: Badal Nilawar Update CAGF functions for MTL to get actual resolved frequency of 3D and SAMedia. v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR) Move MTL branches in cagf functions to top (MattR) Fix commit message (Andi) v3: Added comment about registers not needing forcewake for Gen12+ and returning 0 freq in RC6 Bspec: 66300 Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index a7a0129d0e3fc..b4b1b54ad738f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -21,6 +21,10 @@ */ #define PERF_REG(offset) _MMIO(offset) +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_CAGF_MASKREG_GENMASK(8, 0) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index df21258976d86..5a743ae4dd11e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2093,7 +2093,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + cagf = rpstat & MTL_CAGF_MASK; + else if (GRAPHICS_VER(i915) >= 12) cagf = (rpstat & GEN12_CAGF_MASK) >> GEN12_CAGF_SHIFT; else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = (rpstat >> 8) & 0xff; @@ -2116,7 +2118,13 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (GRAPHICS_VER(i915) >= 12) { + /* +* For Gen12+ reading freq from HW does not need a forcewake and +* registers will return 0 freq when GT is in RC6 +*/ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + freq = intel_uncore_read(rps_to_gt(rps)->uncore, MTL_MIRROR_TARGET_WP1); + } else if (GRAPHICS_VER(i915) >= 12) { freq = intel_uncore_read(uncore, GEN12_RPSTAT1); } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); -- 2.38.0
[PATCH 4/4] drm/i915/mtl: C6 residency and C state type for MTL SAMedia
From: Badal Nilawar Add support for C6 residency and C state type for MTL SAMedia. Also add mtl_drpc. v2: Fixed review comments (Ashutosh) v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R) Remove MTL_CC_SHIFT (Ashutosh) Adapt to RC6 residency register code refactor (Jani N) v4: Move MTL branch to top in drpc_show Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 60 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 ++ drivers/gpu/drm/i915/gt/intel_rc6.c | 17 -- 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 235d593cfaeba..c88d8ec62b692 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -256,6 +256,62 @@ static int ilk_drpc(struct seq_file *m) return 0; } +static int mtl_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, global_forcewake; + u32 mtl_powergate_enable = 0, mtl_powergate_status = 0; + + gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + + global_forcewake = intel_uncore_read(uncore, FORCEWAKE_GT_GEN9); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + mtl_powergate_status = intel_uncore_read(uncore, +GEN9_PWRGT_DOMAIN_STATUS); + + seq_printf(m, "RC6 Enabled: %s\n", + str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (gt->type == GT_MEDIA) { + seq_printf(m, "Media Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } else { + seq_printf(m, "Render Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_RENDER_PG_ENABLE)); + } + + seq_puts(m, "Current RC state: "); + switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) { + case MTL_CC0: + seq_puts(m, "on\n"); + break; + case MTL_CC6: + seq_puts(m, "RC6\n"); + break; + default: + seq_puts(m, "Unknown\n"); + break; + } + + if (gt->type == GT_MEDIA) + seq_printf(m, "Media Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + else + seq_printf(m, "Render Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + + /* Works for both render and media gt's */ + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + + seq_printf(m, "Global Forcewake Requests: 0x%x\n", global_forcewake); + + return fw_domains_show(m, NULL); +} + static int drpc_show(struct seq_file *m, void *unused) { struct intel_gt *gt = m->private; @@ -264,7 +320,9 @@ static int drpc_show(struct seq_file *m, void *unused) int err = -ENODEV; with_intel_runtime_pm(gt->uncore->rpm, wakeref) { - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + err = mtl_drpc(m); + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_drpc(m); else if (GRAPHICS_VER(i915) >= 6) err = gen6_drpc(m); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index b4b1b54ad738f..9f168867eb8ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -24,6 +24,9 @@ /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ #define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) #define MTL_CAGF_MASKREG_GENMASK(8, 0) +#define MTL_CC0 0x0 +#define MTL_CC6 0x3 +#define MTL_CC_MASK REG_GENMASK(12, 9) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) @@ -1516,6 +1519,8 @@ #define FORCEWAKE_MEDIA_VLV_MMIO(0x1300b8) #define FORCEWAKE_ACK_MEDIA_VLV_MMIO(0x1300bc) +#define MTL_MEDIA_MC6 _MMIO(0x138048) + #define GEN6_
[PATCH 0/4] i915: CAGF and RC6 changes for MTL
This series includes the code changes to get CAGF, RC State and C6 Residency of MTL. v3: Included "Use GEN12 RPSTAT register" patch v4: - Rebased - Dropped "Use GEN12 RPSTAT register" patch from this series going to send separate series for it v5: - Included "drm/i915/gt: Change RC6 residency functions to accept register ID's" based on code review feedback v6: - Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency functions to accept register ID's" - Minor changes to other patches, please see individual patches for changelogs Ashutosh Dixit (1): drm/i915/gt: Use RC6 residency types as arguments to residency functions Badal Nilawar (2): drm/i915/mtl: Modify CAGF functions for MTL drm/i915/mtl: C6 residency and C state type for MTL SAMedia Don Hiatt (1): drm/i915: Use GEN12_RPSTAT register for GT freq drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 87 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 11 +++ drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 +-- drivers/gpu/drm/i915/gt/intel_rc6.c | 64 +- drivers/gpu/drm/i915/gt/intel_rc6.h | 8 +- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++- drivers/gpu/drm/i915/gt/intel_rps.c | 40 - drivers/gpu/drm/i915/gt/intel_rps.h | 2 + drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 9 +- 10 files changed, 188 insertions(+), 66 deletions(-) -- 2.38.0
[PATCH 1/4] drm/i915: Use GEN12_RPSTAT register for GT freq
From: Don Hiatt On GEN12+ use GEN12_RPSTAT register to get actual resolved GT freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if GT is in RC6. v2: - Fixed review comments(Ashutosh) - Added function intel_rps_read_rpstat_fw to read RPSTAT without forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko) v3: - Updated commit title and message for more clarity (Ashutosh) - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in read_cagf (Ashutosh) Cc: Don Hiatt Cc: Andi Shyti Signed-off-by: Don Hiatt Signed-off-by: Badal Nilawar Signed-off-by: Ashutosh Dixit Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ drivers/gpu/drm/i915/gt/intel_rps.c | 32 + drivers/gpu/drm/i915/gt/intel_rps.h | 2 ++ drivers/gpu/drm/i915/i915_pmu.c | 3 +-- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 36d95b79022c0..a7a0129d0e3fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1543,6 +1543,8 @@ #define GEN12_RPSTAT1 _MMIO(0x1381b4) #define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) +#define GEN12_CAGF_SHIFT 11 +#define GEN12_CAGF_MASK REG_GENMASK(19, 11) #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2a..df21258976d86 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2068,12 +2068,34 @@ void intel_rps_sanitize(struct intel_rps *rps) rps_disable_interrupts(rps); } +u32 intel_rps_read_rpstat_fw(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat); +} + +u32 intel_rps_read_rpstat(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + i915_reg_t rpstat; + + rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1; + + return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat); +} + u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) { struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER(i915) >= 12) + cagf = (rpstat & GEN12_CAGF_MASK) >> GEN12_CAGF_SHIFT; + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = (rpstat >> 8) & 0xff; else if (GRAPHICS_VER(i915) >= 9) cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; @@ -2094,7 +2116,9 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + if (GRAPHICS_VER(i915) >= 12) { + freq = intel_uncore_read(uncore, GEN12_RPSTAT1); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); @@ -2260,7 +2284,7 @@ static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p) rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD); rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD); - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1); + rpstat = intel_rps_read_rpstat(rps); rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK; rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK; rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK; @@ -2395,7 +2419,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p) drm_printf(p, "PM MASK=0x%08x\n", pm_mask); drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n", rps->pm_intrmsk_mbz); - drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1)); + drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps)); drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps)); drm_printf(p, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, caps.min_freq)); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 110300dfd4383..9e1cad9ba0e9c 100644 --- a/d
[PATCH 3/4] drm/i915/gt: Use RC6 residency types as arguments to residency functions
Previously RC6 residency functions directly accepted RC6 residency register MMIO offsets (there are four RC6 residency registers). This worked but required an assumption on the residency register layout so was not future proof. Therefore change RC6 residency functions to accept RC6 residency types instead of register MMIO offsets. The knowledge of register offsets as well as ID to offset mapping is now maintained solely in intel_rc6 and can be tailored for different platforms and different register layouts as need arises. v2: Address review comments by Jani N - Change residency functions to accept RC6 residency types instead of register ID's - s/intel_rc6_print_rc5_res/intel_rc6_print_residency/ - Remove "const enum" in function arguments - Naming: intel_rc6_* for enum - Use INTEL_RC6_RES_MAX and other minor changes Suggested-by: Rodrigo Vivi Suggested-by: Jani Nikula Reported-by: Jani Nikula Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_rc6.c | 55 +++ drivers/gpu/drm/i915/gt/intel_rc6.h | 8 ++- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 - drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 6 +- 7 files changed, 70 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 40d0a3be42acf..235d593cfaeba 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); -static void print_rc6_res(struct seq_file *m, - const char *title, - const i915_reg_t reg) -{ - struct intel_gt *gt = m->private; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - seq_printf(m, "%s %u (%llu us)\n", title, - intel_uncore_read(gt->uncore, reg), - intel_rc6_residency_us(>rc6, reg)); -} - static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; @@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m) seq_printf(m, "Media Power Well: %s\n", (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + intel_rc6_print_residency(m, "Render RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "Media RC6 residency since boot:", INTEL_RC6_RES_VLV_MEDIA); return fw_domains_show(m, NULL); } @@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m) } /* Not exactly sure what this is */ - print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", - GEN6_GT_GFX_RC6_LOCKED); - print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); - print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since boot:", + INTEL_RC6_RES_RC6_LOCKED); + intel_rc6_print_residency(m, "RC6 residency since boot:", INTEL_RC6_RES_RC6); + intel_rc6_print_residency(m, "RC6+ residency since boot:", INTEL_RC6_RES_RC6p); + intel_rc6_print_residency(m, "RC6++ residency since boot:", INTEL_RC6_RES_RC6pp); if (GRAPHICS_VER(i915) <= 7) { seq_printf(m, "RC6 voltage: %dmV\n", diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 9041609523697..19a6e052c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) #ifdef CONFIG_PM -static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) +static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id) { intel_wakeref_t wakeref; u64 res = 0; with_intel_runtime_pm(gt->uncore->rpm, wakeref) - res = intel_rc6_residency_us(>rc6, reg); + res = intel_rc6_residency_us(>rc6, id); retur
[PATCH 3/3] drm/i915/mtl: C6 residency and C state type for MTL SAMedia
From: Badal Nilawar Add support for C6 residency and C state type for MTL SAMedia. Also add mtl_drpc. v2: Fixed review comments (Ashutosh) v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R) Remove MTL_CC_SHIFT (Ashutosh) Adapt to RC6 residency register code refactor (Jani N) Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 57 +++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 5 ++ drivers/gpu/drm/i915/gt/intel_rc6.c | 17 -- 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 1fb053cbf52db..3a9bb4387248e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -256,6 +256,61 @@ static int ilk_drpc(struct seq_file *m) return 0; } +static int mtl_drpc(struct seq_file *m) +{ + struct intel_gt *gt = m->private; + struct intel_uncore *uncore = gt->uncore; + u32 gt_core_status, rcctl1, global_forcewake; + u32 mtl_powergate_enable = 0, mtl_powergate_status = 0; + + gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1); + + global_forcewake = intel_uncore_read(uncore, FORCEWAKE_GT_GEN9); + + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL); + mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + mtl_powergate_status = intel_uncore_read(uncore, +GEN9_PWRGT_DOMAIN_STATUS); + + seq_printf(m, "RC6 Enabled: %s\n", + str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE)); + if (gt->type == GT_MEDIA) { + seq_printf(m, "Media Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_MEDIA_PG_ENABLE)); + } else { + seq_printf(m, "Render Well Gating Enabled: %s\n", + str_yes_no(mtl_powergate_enable & GEN9_RENDER_PG_ENABLE)); + } + + seq_puts(m, "Current RC state: "); + switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) { + case MTL_CC0: + seq_puts(m, "on\n"); + break; + case MTL_CC6: + seq_puts(m, "RC6\n"); + break; + default: + seq_puts(m, "Unknown\n"); + break; + } + + if (gt->type == GT_MEDIA) + seq_printf(m, "Media Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down"); + else + seq_printf(m, "Render Power Well: %s\n", + (mtl_powergate_status & + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down"); + + intel_rc6_print_rc6_res(m, "RC6 residency since boot:", RC6_RES_REG_RC6); + + seq_printf(m, "Global Forcewake Requests: 0x%x\n", global_forcewake); + + return fw_domains_show(m, NULL); +} + static int drpc_show(struct seq_file *m, void *unused) { struct intel_gt *gt = m->private; @@ -266,6 +321,8 @@ static int drpc_show(struct seq_file *m, void *unused) with_intel_runtime_pm(gt->uncore->rpm, wakeref) { if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_drpc(m); + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + err = mtl_drpc(m); else if (GRAPHICS_VER(i915) >= 6) err = gen6_drpc(m); else diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 66867747f643e..0493ea324b846 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -11,6 +11,9 @@ /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ #define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) #define MTL_CAGF_MASKREG_GENMASK(8, 0) +#define MTL_CC0 0x0 +#define MTL_CC6 0x3 +#define MTL_CC_MASK REG_GENMASK(12, 9) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) @@ -1494,6 +1497,8 @@ #define FORCEWAKE_MEDIA_VLV_MMIO(0x1300b8) #define FORCEWAKE_ACK_MEDIA_VLV_MMIO(0x1300bc) +#define MTL_MEDIA_MC6 _MMIO(0x138048) + #define GEN6_GT_THREAD_STATUS_REG _MMIO(0x13805c) #define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7 diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c
[PATCH 2/3] drm/i915/mtl: Modify CAGF functions for MTL
From: Badal Nilawar Update CAGF functions for MTL to get actual resolved frequency of 3D and SAMedia. v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR) Move MTL branches in cagf functions to top (MattR) Fix commit message (Andi) Bspec: 66300 Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 drivers/gpu/drm/i915/gt/intel_rps.c | 8 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 7f79bbf978284..66867747f643e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -8,6 +8,10 @@ #include "i915_reg_defs.h" +/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ +#define MTL_MIRROR_TARGET_WP1 _MMIO(0xc60) +#define MTL_CAGF_MASKREG_GENMASK(8, 0) + /* RPM unit config (Gen8+) */ #define RPM_CONFIG0_MMIO(0xd00) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3 diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fc23c562d9b2a..59ca7e80e4c6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2073,7 +2073,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) struct drm_i915_private *i915 = rps_to_i915(rps); u32 cagf; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + cagf = rpstat & MTL_CAGF_MASK; + else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) cagf = (rpstat >> 8) & 0xff; else if (GRAPHICS_VER(i915) >= 9) cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; @@ -2094,7 +2096,9 @@ static u32 read_cagf(struct intel_rps *rps) struct intel_uncore *uncore = rps_to_uncore(rps); u32 freq; - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { + freq = intel_uncore_read(rps_to_gt(rps)->uncore, MTL_MIRROR_TARGET_WP1); + } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { vlv_punit_get(i915); freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); vlv_punit_put(i915); -- 2.38.0
[PATCH 0/3] i915: CAGF and RC6 changes for MTL
This series includes the code changes to get CAGF, RC State and C6 Residency of MTL. v2: Included "Use GEN12 RPSTAT register" patch v3: - Rebased - Dropped "Use GEN12 RPSTAT register" patch from this series going to send separate series for it v4: - Included "drm/i915/gt: Change RC6 residency functions to accept register ID's" based on code review feedback - Addressed review comments, please see individual patches for changelogs Ashutosh Dixit (1): drm/i915/gt: Change RC6 residency functions to accept register ID's Badal Nilawar (2): drm/i915/mtl: Modify CAGF functions for MTL drm/i915/mtl: C6 residency and C state type for MTL SAMedia drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 84 ++- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 9 ++ drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 +-- drivers/gpu/drm/i915/gt/intel_rc6.c | 65 +- drivers/gpu/drm/i915/gt/intel_rc6.h | 9 +- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 10 +++ drivers/gpu/drm/i915/gt/intel_rps.c | 8 +- drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 6 +- 9 files changed, 150 insertions(+), 59 deletions(-) -- 2.38.0
[PATCH 1/3] drm/i915/gt: Change RC6 residency functions to accept register ID's
Previously RC6 residency functions directly accepted RC6 residency register MMIO offsets (there are four RC6 residency registers). This worked but required an assumption on the residency register layout so was not future proof. Therefore change RC6 residency functions to accept register ID's instead of register MMIO offsets. The knowledge of register offsets as well as ID to offset mapping is now maintained solely in intel_rc6 and can be tailored for different platforms and different register layouts as need arises. Suggested-by: Rodrigo Vivi Reported-by: Jani Nikula Signed-off-by: Ashutosh Dixit --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++-- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 12 ++-- drivers/gpu/drm/i915/gt/intel_rc6.c | 56 +++ drivers/gpu/drm/i915/gt/intel_rc6.h | 9 ++- drivers/gpu/drm/i915/gt/intel_rc6_types.h | 10 drivers/gpu/drm/i915/gt/selftest_rc6.c| 6 +- drivers/gpu/drm/i915/i915_pmu.c | 6 +- 7 files changed, 69 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 40d0a3be42acf..1fb053cbf52db 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data) } DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains); -static void print_rc6_res(struct seq_file *m, - const char *title, - const i915_reg_t reg) -{ - struct intel_gt *gt = m->private; - intel_wakeref_t wakeref; - - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - seq_printf(m, "%s %u (%llu us)\n", title, - intel_uncore_read(gt->uncore, reg), - intel_rc6_residency_us(>rc6, reg)); -} - static int vlv_drpc(struct seq_file *m) { struct intel_gt *gt = m->private; @@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m) seq_printf(m, "Media Power Well: %s\n", (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down"); - print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6); + intel_rc6_print_rc6_res(m, "Render RC6 residency since boot:", RC6_RES_REG_RC6); + intel_rc6_print_rc6_res(m, "Media RC6 residency since boot:", VLV_RC6_RES_REG_MEDIA_RC6); return fw_domains_show(m, NULL); } @@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m) } /* Not exactly sure what this is */ - print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", - GEN6_GT_GFX_RC6_LOCKED); - print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6); - print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p); - print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp); + intel_rc6_print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:", + RC6_RES_REG_RC6_LOCKED); + intel_rc6_print_rc6_res(m, "RC6 residency since boot:", RC6_RES_REG_RC6); + intel_rc6_print_rc6_res(m, "RC6+ residency since boot:", RC6_RES_REG_RC6p); + intel_rc6_print_rc6_res(m, "RC6++ residency since boot:", RC6_RES_REG_RC6pp); if (GRAPHICS_VER(i915) <= 7) { seq_printf(m, "RC6 voltage: %dmV\n", diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 9041609523697..1ce5bfdc72282 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) #ifdef CONFIG_PM -static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) +static u32 get_residency(struct intel_gt *gt, const enum rc6_res_reg id) { intel_wakeref_t wakeref; u64 res = 0; with_intel_runtime_pm(gt->uncore->rpm, wakeref) - res = intel_rc6_residency_us(>rc6, reg); + res = intel_rc6_residency_us(>rc6, id); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -123,7 +123,7 @@ static ssize_t rc6_enable_show(struct device *dev, static u32 __rc6_residency_ms_show(struct intel_gt *gt) { - return get_residency(gt, GEN6_GT_GFX_RC6); + return get_residency(gt, RC6_RES_REG_RC6); } static ssize_t rc6_residency_ms_show(struct device *dev, @@ -138,7 +138,7 @@ static ssize_t rc6_residency_ms_show(struct dev
[PATCH 0/7] drm/i915: Add HWMON support
This series adds the HWMON support for DGFX Test-with: 20221013151400.2086268-1-ashutosh.di...@intel.com v2: - Reorganized series. Created first patch as infrastructure patch followed by feature patches. (Ashutosh) - Fixed review comments (Jani) - Fixed review comments (Ashutosh) v3: - Fixed review comments from Guenter - Exposed energy inferface as standard hwmon interface (Ashutosh) - For power interface added entries for critical power and maintained standard interface for all the entries except power1_max_interval - Extended support for XEHPSDV (Ashutosh) v4: - Fixed review comment from Guenter - Cleaned up unused code v5: - Fixed review comments (Jani) v6: - Fixed review comments (Ashutosh) - Updated date and kernel version in documentation v7: - Fixed review comments (Anshuman) - KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko) v8: s/hwmon_device_register_with_info/ devm_hwmon_device_register_with_info/ (Ashutosh) v9: Addressed review comments from Rodrigo and Andi Ashutosh Dixit (2): drm/i915/hwmon: Expose card reactive critical power drm/i915/hwmon: Expose power1_max_interval Dale B Stimson (4): drm/i915/hwmon: Add HWMON infrastructure drm/i915/hwmon: Power PL1 limit and TDP setting drm/i915/hwmon: Show device level energy usage drm/i915/hwmon: Extend power/energy for XEHPSDV Riana Tauro (1): drm/i915/hwmon: Add HWMON current voltage support .../ABI/testing/sysfs-driver-intel-i915-hwmon | 75 ++ MAINTAINERS | 1 + drivers/gpu/drm/i915/Makefile | 3 + drivers/gpu/drm/i915/gt/intel_gt_regs.h | 8 + drivers/gpu/drm/i915/i915_driver.c| 5 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_hwmon.c | 738 ++ drivers/gpu/drm/i915/i915_hwmon.h | 20 + drivers/gpu/drm/i915/i915_reg.h | 6 + drivers/gpu/drm/i915/intel_mchbar_regs.h | 21 + 10 files changed, 879 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon create mode 100644 drivers/gpu/drm/i915/i915_hwmon.c create mode 100644 drivers/gpu/drm/i915/i915_hwmon.h -- 2.38.0
[PATCH 1/7] drm/i915/hwmon: Add HWMON infrastructure
From: Dale B Stimson The i915 HWMON module will be used to expose voltage, power and energy values for dGfx. Here we set up i915 hwmon infrastructure including i915 hwmon registration, basic data structures and functions. v2: - Create HWMON infra patch (Ashutosh) - Fixed review comments (Jani) - Remove "select HWMON" from i915/Kconfig (Jani) v3: Use hwm_ prefix for static functions (Ashutosh) v4: s/#ifdef CONFIG_HWMON/#if IS_REACHABLE(CONFIG_HWMON)/ since the former doesn't work if hwmon is compiled as a module (Guenter) v5: Fixed review comments (Jani) v6: s/kzalloc/devm_kzalloc/ (Andi) v7: s/hwmon_device_register_with_info/ devm_hwmon_device_register_with_info/ (Ashutosh) Cc: Guenter Roeck Signed-off-by: Dale B Stimson Signed-off-by: Ashutosh Dixit Signed-off-by: Riana Tauro Signed-off-by: Badal Nilawar Acked-by: Guenter Roeck Reviewed-by: Ashutosh Dixit Reviewed-by: Anshuman Gupta Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/Makefile | 3 + drivers/gpu/drm/i915/i915_driver.c | 5 ++ drivers/gpu/drm/i915/i915_drv.h| 2 + drivers/gpu/drm/i915/i915_hwmon.c | 122 + drivers/gpu/drm/i915/i915_hwmon.h | 20 + 5 files changed, 152 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_hwmon.c create mode 100644 drivers/gpu/drm/i915/i915_hwmon.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f8cc1eb52626e..2535593ab379e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -209,6 +209,9 @@ i915-y += gt/uc/intel_uc.o \ # graphics system controller (GSC) support i915-y += gt/intel_gsc.o +# graphics hardware monitoring (HWMON) support +i915-$(CONFIG_HWMON) += i915_hwmon.o + # modesetting core code i915-y += \ display/hsw_ips.o \ diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 24d3d2d85fd57..49868dc51 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -81,6 +81,7 @@ #include "i915_drm_client.h" #include "i915_drv.h" #include "i915_getparam.h" +#include "i915_hwmon.h" #include "i915_ioc32.h" #include "i915_ioctl.h" #include "i915_irq.h" @@ -763,6 +764,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) for_each_gt(gt, dev_priv, i) intel_gt_driver_register(gt); + i915_hwmon_register(dev_priv); + intel_display_driver_register(dev_priv); intel_power_domains_enable(dev_priv); @@ -795,6 +798,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) for_each_gt(gt, dev_priv, i) intel_gt_driver_unregister(gt); + i915_hwmon_unregister(dev_priv); + i915_perf_unregister(dev_priv); i915_pmu_unregister(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 90ed8e6db2fe0..a81372ddd2db7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -349,6 +349,8 @@ struct drm_i915_private { struct i915_perf perf; + struct i915_hwmon *hwmon; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct intel_gt gt0; diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c new file mode 100644 index 0..231552fda374a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_hwmon.h" +#include "i915_reg.h" +#include "intel_mchbar_regs.h" + +struct hwm_reg { +}; + +struct hwm_drvdata { + struct i915_hwmon *hwmon; + struct intel_uncore *uncore; + struct device *hwmon_dev; + char name[12]; +}; + +struct i915_hwmon { + struct hwm_drvdata ddat; + struct mutex hwmon_lock;/* counter overflow logic and rmw */ + struct hwm_reg rg; +}; + +static const struct hwmon_channel_info *hwm_info[] = { + NULL +}; + +static umode_t +hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + switch (type) { + default: + return 0; + } +} + +static int +hwm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, +int channel, long *val) +{ + switch (type) { + default: + return -EOPNOTSUPP; + } +} + +static int +hwm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, + int channel, long val) +{ + switch (type) { + default: + return -EOPNOTSUPP; + } +} + +static const struct hwmon_ops hwm_ops = { + .is_visible = hwm_is_visible, + .read = hwm_read, +
[PATCH 4/7] drm/i915/hwmon: Show device level energy usage
From: Dale B Stimson Use i915 HWMON to display device level energy input. v2: Updated the date and kernel version in feature description v3: - Cleaned up hwm_energy function and removed unused function i915_hwmon_energy_status_get (Ashutosh) v4: KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko) v5: Change contact to intel-gfx (Rodrigo) Change return type of hwm_energy to void (Andi) Signed-off-by: Dale B Stimson Signed-off-by: Ashutosh Dixit Signed-off-by: Riana Tauro Signed-off-by: Badal Nilawar Acked-by: Guenter Roeck Reviewed-by: Ashutosh Dixit Reviewed-by: Anshuman Gupta --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 8 ++ drivers/gpu/drm/i915/i915_hwmon.c | 106 +- drivers/gpu/drm/i915/intel_mchbar_regs.h | 2 + 3 files changed, 114 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index e0b1af4ec6d84..aa00c558495b3 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -25,3 +25,11 @@ Contact: intel-...@lists.freedesktop.org Description: RO. Card default power limit (default TDP setting). Only supported for particular Intel i915 graphics platforms. + +What: /sys/devices/.../hwmon/hwmon/energy1_input +Date: February 2023 +KernelVersion: 6.2 +Contact: intel-...@lists.freedesktop.org +Description: RO. Energy input of device in microjoules. + + Only supported for particular Intel i915 graphics platforms. diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index db254413a07da..d8d30daa37944 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -17,21 +17,30 @@ * SF_* - scale factors for particular quantities according to hwmon spec. * - voltage - millivolts * - power - microwatts + * - energy - microjoules */ #define SF_VOLTAGE 1000 #define SF_POWER 100 +#define SF_ENERGY 100 struct hwm_reg { i915_reg_t gt_perf_status; i915_reg_t pkg_power_sku_unit; i915_reg_t pkg_power_sku; i915_reg_t pkg_rapl_limit; + i915_reg_t energy_status_all; +}; + +struct hwm_energy_info { + u32 reg_val_prev; + long accum_energy; /* Accumulated energy for energy1_input */ }; struct hwm_drvdata { struct i915_hwmon *hwmon; struct intel_uncore *uncore; struct device *hwmon_dev; + struct hwm_energy_info ei; /* Energy info for energy1_input */ char name[12]; }; @@ -40,6 +49,7 @@ struct i915_hwmon { struct mutex hwmon_lock;/* counter overflow logic and rmw */ struct hwm_reg rg; int scl_shift_power; + int scl_shift_energy; }; static void @@ -98,9 +108,58 @@ hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr, bits_to_clear, bits_to_set); } +/* + * hwm_energy - Obtain energy value + * + * The underlying energy hardware register is 32-bits and is subject to + * overflow. How long before overflow? For example, with an example + * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and + * a power draw of 1000 watts, the 32-bit counter will overflow in + * approximately 4.36 minutes. + * + * Examples: + *1 watt: (2^32 >> 14) /1 W / (60 * 60 * 24) secs/day -> 3 days + * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes + * + * The function significantly increases overflow duration (from 4.36 + * minutes) by accumulating the energy register into a 'long' as allowed by + * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), + * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and + * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before + * energy1_input overflows. This at 1000 W is an overflow duration of 278 years. + */ +static void +hwm_energy(struct hwm_drvdata *ddat, long *energy) +{ + struct intel_uncore *uncore = ddat->uncore; + struct i915_hwmon *hwmon = ddat->hwmon; + struct hwm_energy_info *ei = >ei; + intel_wakeref_t wakeref; + i915_reg_t rgaddr; + u32 reg_val; + + rgaddr = hwmon->rg.energy_status_all; + + mutex_lock(>hwmon_lock); + + with_intel_runtime_pm(uncore->rpm, wakeref) + reg_val = intel_uncore_read(uncore, rgaddr); + + if (reg_val >= ei->reg_val_prev) + ei->accum_energy += reg_val - ei->reg_val_prev; + else + ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; + ei->reg_val_prev = reg_val; + + *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, +
[PATCH 2/7] drm/i915/hwmon: Add HWMON current voltage support
From: Riana Tauro Use i915 HWMON subsystem to display current input voltage. v2: - Updated date and kernel version in feature description - Fixed review comments (Ashutosh) v3: Use macro HWMON_CHANNEL_INFO to define hwmon channel (Guenter) v4: - Fixed review comments (Ashutosh) - Use hwm_ prefix for static functions (Ashutosh) v5: Added unit of voltage as millivolts (Ashutosh) v6: KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko) v7: Change contact to intel-gfx (Rodrigo) GEN12_RPSTAT1 is available for all Gen12+ (Andi) Added Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon to MAINTAINERS Cc: Guenter Roeck Cc: Anshuman Gupta Signed-off-by: Riana Tauro Signed-off-by: Badal Nilawar Signed-off-by: Ashutosh Dixit Acked-by: Guenter Roeck Reviewed-by: Ashutosh Dixit Reviewed-by: Anshuman Gupta --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 7 +++ MAINTAINERS | 1 + drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 ++ drivers/gpu/drm/i915/i915_hwmon.c | 53 +++ 4 files changed, 64 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon new file mode 100644 index 0..5f4b136f08509 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -0,0 +1,7 @@ +What: /sys/devices/.../hwmon/hwmon/in0_input +Date: February 2023 +KernelVersion: 6.2 +Contact: intel-...@lists.freedesktop.org +Description: RO. Current Voltage in millivolt. + + Only supported for particular Intel i915 graphics platforms. diff --git a/MAINTAINERS b/MAINTAINERS index f07a8bf8744f9..7d57ede980940 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10144,6 +10144,7 @@ Q: http://patchwork.freedesktop.org/project/intel-gfx/ B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://irc.oftc.net/intel-gfx T: git git://anongit.freedesktop.org/drm-intel +F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon F: Documentation/gpu/i915.rst F: drivers/gpu/drm/i915/ F: include/drm/i915* diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 7f79bbf978284..fcf5f9012852f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1519,6 +1519,9 @@ #define VLV_RENDER_C0_COUNT_MMIO(0x138118) #define VLV_MEDIA_C0_COUNT _MMIO(0x13811c) +#define GEN12_RPSTAT1 _MMIO(0x1381b4) +#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) + #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) #define GEN11_GUNIT (28) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 231552fda374a..025399391ddcc 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -11,8 +11,16 @@ #include "i915_hwmon.h" #include "i915_reg.h" #include "intel_mchbar_regs.h" +#include "gt/intel_gt_regs.h" + +/* + * SF_* - scale factors for particular quantities according to hwmon spec. + * - voltage - millivolts + */ +#define SF_VOLTAGE 1000 struct hwm_reg { + i915_reg_t gt_perf_status; }; struct hwm_drvdata { @@ -29,14 +37,51 @@ struct i915_hwmon { }; static const struct hwmon_channel_info *hwm_info[] = { + HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), NULL }; +static umode_t +hwm_in_is_visible(const struct hwm_drvdata *ddat, u32 attr) +{ + struct drm_i915_private *i915 = ddat->uncore->i915; + + switch (attr) { + case hwmon_in_input: + return IS_DG1(i915) || IS_DG2(i915) ? 0444 : 0; + default: + return 0; + } +} + +static int +hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val) +{ + struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; + u32 reg_value; + + switch (attr) { + case hwmon_in_input: + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + reg_value = intel_uncore_read(ddat->uncore, hwmon->rg.gt_perf_status); + /* HW register value in units of 2.5 millivolt */ + *val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) * 25, 10); + return 0; + default: + return -EOPNOTSUPP; + } +} + static umode_t hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) { + struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata; + switch (type) { + case hwmon_in: +
[PATCH 6/7] drm/i915/hwmon: Expose power1_max_interval
Expose power1_max_interval, that is the tau corresponding to PL1, as a custom hwmon attribute. Some bit manipulation is needed because of the format of PKG_PWR_LIM_1_TIME in GT0_PACKAGE_RAPL_LIMIT register (1.x * power(2,y)). v2: Update date and kernel version in Documentation (Badal) v3: Cleaned up hwm_power1_max_interval_store() (Badal) v4: - Fixed review comments (Anshuman) - In hwm_power1_max_interval_store() get PKG_MAX_WIN from pkg_power_sku when it is valid (Ashutosh) - KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko) v5: On some of the DGFX setups it is seen that although pkg_power_sku is valid the field PKG_WIN_MAX is not populated. So it is decided to stick to default value of PKG_WIN_MAX (Ashutosh) v6: Change contact to intel-gfx (Rodrigo) Fixed variable types in hwm_power1_max_interval_store (Andi) Documented PKG_MAX_WIN_DEFAULT (Andi) Removed else in hwm_attributes_visible (Andi) Signed-off-by: Ashutosh Dixit Signed-off-by: Badal Nilawar Acked-by: Guenter Roeck Reviewed-by: Anshuman Gupta --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 9 ++ drivers/gpu/drm/i915/i915_hwmon.c | 119 +- drivers/gpu/drm/i915/intel_mchbar_regs.h | 7 ++ 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index a7a6512fcc8ca..9dc5ff14107bb 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -26,6 +26,15 @@ Description: RO. Card default power limit (default TDP setting). Only supported for particular Intel i915 graphics platforms. +What: /sys/devices/.../hwmon/hwmon/power1_max_interval +Date: February 2023 +KernelVersion: 6.2 +Contact: intel-...@lists.freedesktop.org +Description: RW. Sustained power limit interval (Tau in PL1/Tau) in + milliseconds over which sustained power is averaged. + + Only supported for particular Intel i915 graphics platforms. + What: /sys/devices/.../hwmon/hwmon/power1_crit Date: February 2023 KernelVersion: 6.2 diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 2b24a7a711400..58f80380e5427 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -20,11 +20,13 @@ * - power - microwatts * - curr - milliamperes * - energy - microjoules + * - time - milliseconds */ #define SF_VOLTAGE 1000 #define SF_POWER 100 #define SF_CURR1000 #define SF_ENERGY 100 +#define SF_TIME1000 struct hwm_reg { i915_reg_t gt_perf_status; @@ -53,6 +55,7 @@ struct i915_hwmon { struct hwm_reg rg; int scl_shift_power; int scl_shift_energy; + int scl_shift_time; }; static void @@ -159,6 +162,119 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy) mutex_unlock(>hwmon_lock); } +static ssize_t +hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr, +char *buf) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + struct i915_hwmon *hwmon = ddat->hwmon; + intel_wakeref_t wakeref; + u32 r, x, y, x_w = 2; /* 2 bits */ + u64 tau4, out; + + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) + r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); + + x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); + y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); + /* +* tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) +* = (4 | x) << (y - 2) +* where (y - 2) ensures a 1.x fixed point representation of 1.x +* However because y can be < 2, we compute +* tau4 = (4 | x) << y +* but add 2 when doing the final right shift to account for units +*/ + tau4 = ((1 << x_w) | x) << y; + /* val in hwmon interface units (millisec) */ + out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); + + return sysfs_emit(buf, "%llu\n", out); +} + +static ssize_t +hwm_power1_max_interval_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hwm_drvdata *ddat = dev_get_drvdata(dev); + struct i915_hwmon *hwmon = ddat->hwmon; + u32 x, y, rxy, x_w = 2; /* 2 bits */ + u64 tau4, r, max_win; + unsigned long val; + int ret; + + ret = kstrtoul(buf, 0, ); + if (ret) + return ret; + + /* +* Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12 +* The hwmon->scl_shift_time default of 0xa results in a max tau