[PATCH] drm/i915/hwmon: Get rid of devm

2024-04-17 Thread Ashutosh Dixit
When both hwmon and hwmon drvdata (on which hwmon depends) are device
managed resources, the expectation, on device unbind, is that hwmon will be
released before drvdata. However, in i915 there are two separate code
paths, which both release either drvdata or hwmon and either can be
released before the other. These code paths (for device unbind) are as
follows (see also the bug referenced below):

Call Trace:
release_nodes+0x11/0x70
devres_release_group+0xb2/0x110
component_unbind_all+0x8d/0xa0
component_del+0xa5/0x140
intel_pxp_tee_component_fini+0x29/0x40 [i915]
intel_pxp_fini+0x33/0x80 [i915]
i915_driver_remove+0x4c/0x120 [i915]
i915_pci_remove+0x19/0x30 [i915]
pci_device_remove+0x32/0xa0
device_release_driver_internal+0x19c/0x200
unbind_store+0x9c/0xb0

and

Call Trace:
release_nodes+0x11/0x70
devres_release_all+0x8a/0xc0
device_unbind_cleanup+0x9/0x70
device_release_driver_internal+0x1c1/0x200
unbind_store+0x9c/0xb0

This means that in i915, if use devm, we cannot gurantee that hwmon will
always be released before drvdata. Which means that we have a uaf if hwmon
sysfs is accessed when drvdata has been released but hwmon hasn't.

The only way out of this seems to be do get rid of devm_ and release/free
everything explicitly during device unbind.

v2: Change commit message and other minor code changes
v3: Cleanup from i915_hwmon_register on error (Armin Wolf)
v4: Eliminate potential static analyzer warning (Rodrigo)
Eliminate fetch_and_zero (Jani)
v5: Restore previous logic for ddat_gt->hwmon_dev error return (Andi)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366
Reviewed-by: Rodrigo Vivi 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 46 +--
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index b758fd110c20..c0662a022f59 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!IS_DGFX(i915))
return;
 
-   hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+   hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
 
@@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915)
hwm_get_preregistration_info(i915);
 
/*  hwmon_dev points to device hwmon */
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
-ddat,
-_chip_info,
-hwm_groups);
-   if (IS_ERR(hwmon_dev)) {
-   i915->hwmon = NULL;
-   return;
-   }
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat->name,
+   ddat,
+   _chip_info,
+   hwm_groups);
+   if (IS_ERR(hwmon_dev))
+   goto err;
 
ddat->hwmon_dev = hwmon_dev;
 
@@ -839,16 +837,36 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, 
hwmon_energy_input, 0))
continue;
 
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, 
ddat_gt->name,
-ddat_gt,
-
_gt_chip_info,
-NULL);
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name,
+   ddat_gt,
+   _gt_chip_info,
+   NULL);
if (!IS_ERR(hwmon_dev))
ddat_gt->hwmon_dev = hwmon_dev;
}
+   return;
+err:
+   i915_hwmon_unregister(i915);
 }
 
 void i915_hwmon_unregister(struct drm_i915_private *i915)
 {
-   fetch_and_zero(>hwmon);
+   struct i915_hwmon *hwmon = i915->hwmon;
+   struct intel_gt *gt;
+   int i;
+
+   if (!hwmon)
+   return;
+
+   for_each_gt(gt, i915, i)
+   if (hwmon->ddat_gt[i].hwmon_dev)
+   hwmon_device_unregister(hwmon->ddat_gt[i].hwmon_dev);
+
+   if (hwmon->ddat.hwmon_dev)
+   hwmon_device_unregister(hwmon->ddat.hwmon_dev);
+
+   mutex_destroy(>hwmon_lock);
+
+   kfree(i915->hwmon);
+   i915->hwmon = NULL;
 }
-- 
2.41.0



[PATCH v4] drm/i915/hwmon: Get rid of devm

2024-04-16 Thread Ashutosh Dixit
When both hwmon and hwmon drvdata (on which hwmon depends) are device
managed resources, the expectation, on device unbind, is that hwmon will be
released before drvdata. However, in i915 there are two separate code
paths, which both release either drvdata or hwmon and either can be
released before the other. These code paths (for device unbind) are as
follows (see also the bug referenced below):

Call Trace:
release_nodes+0x11/0x70
devres_release_group+0xb2/0x110
component_unbind_all+0x8d/0xa0
component_del+0xa5/0x140
intel_pxp_tee_component_fini+0x29/0x40 [i915]
intel_pxp_fini+0x33/0x80 [i915]
i915_driver_remove+0x4c/0x120 [i915]
i915_pci_remove+0x19/0x30 [i915]
pci_device_remove+0x32/0xa0
device_release_driver_internal+0x19c/0x200
unbind_store+0x9c/0xb0

and

Call Trace:
release_nodes+0x11/0x70
devres_release_all+0x8a/0xc0
device_unbind_cleanup+0x9/0x70
device_release_driver_internal+0x1c1/0x200
unbind_store+0x9c/0xb0

This means that in i915, if use devm, we cannot gurantee that hwmon will
always be released before drvdata. Which means that we have a uaf if hwmon
sysfs is accessed when drvdata has been released but hwmon hasn't.

The only way out of this seems to be do get rid of devm_ and release/free
everything explicitly during device unbind.

v2: Change commit message and other minor code changes
v3: Cleanup from i915_hwmon_register on error (Armin Wolf)
v4: Eliminate potential static analyzer warning (Rodrigo)
Eliminate fetch_and_zero (Jani)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366
Reviewed-by: Rodrigo Vivi 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 52 +--
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index b758fd110c20..1551a40a675e 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!IS_DGFX(i915))
return;
 
-   hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+   hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
 
@@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915)
hwm_get_preregistration_info(i915);
 
/*  hwmon_dev points to device hwmon */
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
-ddat,
-_chip_info,
-hwm_groups);
-   if (IS_ERR(hwmon_dev)) {
-   i915->hwmon = NULL;
-   return;
-   }
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat->name,
+   ddat,
+   _chip_info,
+   hwm_groups);
+   if (IS_ERR(hwmon_dev))
+   goto err;
 
ddat->hwmon_dev = hwmon_dev;
 
@@ -839,16 +837,38 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, 
hwmon_energy_input, 0))
continue;
 
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, 
ddat_gt->name,
-ddat_gt,
-
_gt_chip_info,
-NULL);
-   if (!IS_ERR(hwmon_dev))
-   ddat_gt->hwmon_dev = hwmon_dev;
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name,
+   ddat_gt,
+   _gt_chip_info,
+   NULL);
+   if (IS_ERR(hwmon_dev))
+   goto err;
+
+   ddat_gt->hwmon_dev = hwmon_dev;
}
+   return;
+err:
+   i915_hwmon_unregister(i915);
 }
 
 void i915_hwmon_unregister(struct drm_i915_private *i915)
 {
-   fetch_and_zero(>hwmon);
+   struct i915_hwmon *hwmon = i915->hwmon;
+   struct intel_gt *gt;
+   int i;
+
+   if (!hwmon)
+   return;
+
+   for_each_gt(gt, i915, i)
+   if (hwmon->ddat_gt[i].hwmon_dev)
+   hwmon_device_unregister(hwmon->ddat_gt[i].hwmon_dev);
+
+   if (hwmon->ddat.hwmon_dev)
+   hwmon_device_unregister(hwmon->ddat.hwmon_dev);
+
+   mutex_destroy(>hwmon_lock);
+
+   kfree(i915->hwmon);
+   i915->hwmon = NULL;
 }
-- 
2.41.0



[PATCH v3] drm/i915/hwmon: Get rid of devm

2024-04-15 Thread Ashutosh Dixit
When both hwmon and hwmon drvdata (on which hwmon depends) are device
managed resources, the expectation, on device unbind, is that hwmon will be
released before drvdata. However, in i915 there are two separate code
paths, which both release either drvdata or hwmon and either can be
released before the other. These code paths (for device unbind) are as
follows (see also the bug referenced below):

Call Trace:
release_nodes+0x11/0x70
devres_release_group+0xb2/0x110
component_unbind_all+0x8d/0xa0
component_del+0xa5/0x140
intel_pxp_tee_component_fini+0x29/0x40 [i915]
intel_pxp_fini+0x33/0x80 [i915]
i915_driver_remove+0x4c/0x120 [i915]
i915_pci_remove+0x19/0x30 [i915]
pci_device_remove+0x32/0xa0
device_release_driver_internal+0x19c/0x200
unbind_store+0x9c/0xb0

and

Call Trace:
release_nodes+0x11/0x70
devres_release_all+0x8a/0xc0
device_unbind_cleanup+0x9/0x70
device_release_driver_internal+0x1c1/0x200
unbind_store+0x9c/0xb0

This means that in i915, if use devm, we cannot gurantee that hwmon will
always be released before drvdata. Which means that we have a uaf if hwmon
sysfs is accessed when drvdata has been released but hwmon hasn't.

The only way out of this seems to be do get rid of devm_ and release/free
everything explicitly during device unbind.

v2: Change commit message and other minor code changes
v3: Cleanup from i915_hwmon_register on error (Armin Wolf)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 54 ++-
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index b758fd110c20..8cebf6f5b101 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!IS_DGFX(i915))
return;
 
-   hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+   hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
 
@@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915)
hwm_get_preregistration_info(i915);
 
/*  hwmon_dev points to device hwmon */
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
-ddat,
-_chip_info,
-hwm_groups);
-   if (IS_ERR(hwmon_dev)) {
-   i915->hwmon = NULL;
-   return;
-   }
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat->name,
+   ddat,
+   _chip_info,
+   hwm_groups);
+   if (IS_ERR(hwmon_dev))
+   goto err;
 
ddat->hwmon_dev = hwmon_dev;
 
@@ -839,16 +837,40 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, 
hwmon_energy_input, 0))
continue;
 
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, 
ddat_gt->name,
-ddat_gt,
-
_gt_chip_info,
-NULL);
-   if (!IS_ERR(hwmon_dev))
-   ddat_gt->hwmon_dev = hwmon_dev;
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name,
+   ddat_gt,
+   _gt_chip_info,
+   NULL);
+   if (IS_ERR(hwmon_dev))
+   goto err;
+
+   ddat_gt->hwmon_dev = hwmon_dev;
}
+   return;
+err:
+   i915_hwmon_unregister(i915);
 }
 
 void i915_hwmon_unregister(struct drm_i915_private *i915)
 {
-   fetch_and_zero(>hwmon);
+   struct i915_hwmon *hwmon = fetch_and_zero(>hwmon);
+   struct hwm_drvdata *ddat = >ddat;
+   struct intel_gt *gt;
+   int i;
+
+   if (!hwmon)
+   return;
+
+   for_each_gt(gt, i915, i) {
+   struct hwm_drvdata *ddat_gt = hwmon->ddat_gt + i;
+
+   if (ddat_gt->hwmon_dev)
+   hwmon_device_unregister(ddat_gt->hwmon_dev);
+   }
+
+   if (ddat->hwmon_dev)
+   hwmon_device_unregister(ddat->hwmon_dev);
+
+   mutex_destroy(>hwmon_lock);
+   kfree(hwmon);
 }
-- 
2.41.0



[PATCH] drm/i915/hwmon: Get rid of devm

2024-04-15 Thread Ashutosh Dixit
When both hwmon and hwmon drvdata (on which hwmon depends) are device
managed resources, the expectation, on device unbind, is that hwmon will be
released before drvdata. However, in i915 there are two separate code
paths, which both release either drvdata or hwmon and either can be
released before the other. These code paths (for device unbind) are as
follows (see also the bug referenced below):

Call Trace:
release_nodes+0x11/0x70
devres_release_group+0xb2/0x110
component_unbind_all+0x8d/0xa0
component_del+0xa5/0x140
intel_pxp_tee_component_fini+0x29/0x40 [i915]
intel_pxp_fini+0x33/0x80 [i915]
i915_driver_remove+0x4c/0x120 [i915]
i915_pci_remove+0x19/0x30 [i915]
pci_device_remove+0x32/0xa0
device_release_driver_internal+0x19c/0x200
unbind_store+0x9c/0xb0

and

Call Trace:
release_nodes+0x11/0x70
devres_release_all+0x8a/0xc0
device_unbind_cleanup+0x9/0x70
device_release_driver_internal+0x1c1/0x200
unbind_store+0x9c/0xb0

This means that in i915, if use devm, we cannot gurantee that hwmon will
always be released before drvdata. Which means that we have a uaf if hwmon
sysfs is accessed when drvdata has been released but hwmon hasn't.

The only way out of this seems to be do get rid of devm_ and release/free
everything explicitly during device unbind.

v2: Change commit message and other minor code changes
v3: Cleanup from i915_hwmon_register on error (Armin Wolf)

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 54 ++-
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index b758fd110c20..8cebf6f5b101 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -793,7 +793,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!IS_DGFX(i915))
return;
 
-   hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+   hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
 
@@ -819,14 +819,12 @@ void i915_hwmon_register(struct drm_i915_private *i915)
hwm_get_preregistration_info(i915);
 
/*  hwmon_dev points to device hwmon */
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
-ddat,
-_chip_info,
-hwm_groups);
-   if (IS_ERR(hwmon_dev)) {
-   i915->hwmon = NULL;
-   return;
-   }
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat->name,
+   ddat,
+   _chip_info,
+   hwm_groups);
+   if (IS_ERR(hwmon_dev))
+   goto err;
 
ddat->hwmon_dev = hwmon_dev;
 
@@ -839,16 +837,40 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, 
hwmon_energy_input, 0))
continue;
 
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, 
ddat_gt->name,
-ddat_gt,
-
_gt_chip_info,
-NULL);
-   if (!IS_ERR(hwmon_dev))
-   ddat_gt->hwmon_dev = hwmon_dev;
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name,
+   ddat_gt,
+   _gt_chip_info,
+   NULL);
+   if (IS_ERR(hwmon_dev))
+   goto err;
+
+   ddat_gt->hwmon_dev = hwmon_dev;
}
+   return;
+err:
+   i915_hwmon_unregister(i915);
 }
 
 void i915_hwmon_unregister(struct drm_i915_private *i915)
 {
-   fetch_and_zero(>hwmon);
+   struct i915_hwmon *hwmon = fetch_and_zero(>hwmon);
+   struct hwm_drvdata *ddat = >ddat;
+   struct intel_gt *gt;
+   int i;
+
+   if (!hwmon)
+   return;
+
+   for_each_gt(gt, i915, i) {
+   struct hwm_drvdata *ddat_gt = hwmon->ddat_gt + i;
+
+   if (ddat_gt->hwmon_dev)
+   hwmon_device_unregister(ddat_gt->hwmon_dev);
+   }
+
+   if (ddat->hwmon_dev)
+   hwmon_device_unregister(ddat->hwmon_dev);
+
+   mutex_destroy(>hwmon_lock);
+   kfree(hwmon);
 }
-- 
2.41.0



[PATCH v2] drm/i915/hwmon: Get rid of devm

2024-04-15 Thread Ashutosh Dixit
When both hwmon and hwmon drvdata (on which hwmon depends) are device
managed resources, the expectation, on device unbind, is that hwmon will be
released before drvdata. However, in i915 there are two separate code
paths, which both release either drvdata or hwmon and either can be
released before the other. These code paths (for device unbind) are as
follows (see also the bug referenced below):

Call Trace:
release_nodes+0x11/0x70
devres_release_group+0xb2/0x110
component_unbind_all+0x8d/0xa0
component_del+0xa5/0x140
intel_pxp_tee_component_fini+0x29/0x40 [i915]
intel_pxp_fini+0x33/0x80 [i915]
i915_driver_remove+0x4c/0x120 [i915]
i915_pci_remove+0x19/0x30 [i915]
pci_device_remove+0x32/0xa0
device_release_driver_internal+0x19c/0x200
unbind_store+0x9c/0xb0

and

Call Trace:
release_nodes+0x11/0x70
devres_release_all+0x8a/0xc0
device_unbind_cleanup+0x9/0x70
device_release_driver_internal+0x1c1/0x200
unbind_store+0x9c/0xb0

This means that in i915, if use devm, we cannot gurantee that hwmon will
always be released before drvdata. Which means that we have a uaf if hwmon
sysfs is accessed when drvdata has been released but hwmon hasn't.

The only way out of this seems to be do get rid of devm_ and release/free
everything explicitly during device unbind.

v2: Change commit message and other minor code changes

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10366
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 41 +++
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 8c3f443c8347..46c24b1ee6df 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -792,7 +792,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!IS_DGFX(i915))
return;
 
-   hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+   hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
 
@@ -818,10 +818,10 @@ void i915_hwmon_register(struct drm_i915_private *i915)
hwm_get_preregistration_info(i915);
 
/*  hwmon_dev points to device hwmon */
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
-ddat,
-_chip_info,
-hwm_groups);
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat->name,
+   ddat,
+   _chip_info,
+   hwm_groups);
if (IS_ERR(hwmon_dev)) {
i915->hwmon = NULL;
return;
@@ -838,10 +838,10 @@ void i915_hwmon_register(struct drm_i915_private *i915)
if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, 
hwmon_energy_input, 0))
continue;
 
-   hwmon_dev = devm_hwmon_device_register_with_info(dev, 
ddat_gt->name,
-ddat_gt,
-
_gt_chip_info,
-NULL);
+   hwmon_dev = hwmon_device_register_with_info(dev, ddat_gt->name,
+   ddat_gt,
+   _gt_chip_info,
+   NULL);
if (!IS_ERR(hwmon_dev))
ddat_gt->hwmon_dev = hwmon_dev;
}
@@ -849,5 +849,26 @@ void i915_hwmon_register(struct drm_i915_private *i915)
 
 void i915_hwmon_unregister(struct drm_i915_private *i915)
 {
-   fetch_and_zero(>hwmon);
+   struct i915_hwmon *hwmon = fetch_and_zero(>hwmon);
+   struct hwm_drvdata *ddat = >ddat;
+   struct intel_gt *gt;
+   int i;
+
+   if (!hwmon)
+   return;
+
+   for_each_gt(gt, i915, i) {
+   struct hwm_drvdata *ddat_gt = hwmon->ddat_gt + i;
+
+   if (ddat_gt->hwmon_dev) {
+   hwmon_device_unregister(ddat_gt->hwmon_dev);
+   ddat_gt->hwmon_dev = NULL;
+   }
+   }
+
+   if (ddat->hwmon_dev)
+   hwmon_device_unregister(ddat->hwmon_dev);
+
+   mutex_destroy(>hwmon_lock);
+   kfree(hwmon);
 }
-- 
2.41.0



[PATCH] drm/i915/perf: Remove gtt_offset from stream->oa_buffer.head/.tail

2023-09-19 Thread Ashutosh Dixit
There is no reason to add gtt_offset to the cached head/tail pointers
stream->oa_buffer.head and stream->oa_buffer.tail. This causes the code to
constantly add gtt_offset and subtract gtt_offset and is error
prone.

It is much simpler to maintain stream->oa_buffer.head and
stream->oa_buffer.tail without adding gtt_offset to them and just allow for
the gtt_offset when reading/writing from/to HW registers.

v2: Minor tweak to commit message due to dropping patch in previous series

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c | 52 
 1 file changed, 13 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 018f42fff4cc0..1347e4ec9dd5a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -543,10 +543,9 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
 {
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
int report_size = stream->oa_buffer.format->size;
-   u32 head, tail, read_tail;
+   u32 tail, hw_tail;
unsigned long flags;
bool pollin;
-   u32 hw_tail;
u32 partial_report_size;
 
/* We have to consider the (unlikely) possibility that read() errors
@@ -556,6 +555,7 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
spin_lock_irqsave(>oa_buffer.ptr_lock, flags);
 
hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
+   hw_tail -= gtt_offset;
 
/* The tail pointer increases in 64 byte increments, not in report_size
 * steps. Also the report size may not be a power of 2. Compute
@@ -567,13 +567,6 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
/* Subtract partial amount off the tail */
hw_tail = OA_TAKEN(hw_tail, partial_report_size);
 
-   /* NB: The head we observe here might effectively be a little
-* out of date. If a read() is in progress, the head could be
-* anywhere between this head and stream->oa_buffer.tail.
-*/
-   head = stream->oa_buffer.head - gtt_offset;
-   read_tail = stream->oa_buffer.tail - gtt_offset;
-
tail = hw_tail;
 
/* Walk the stream backward until we find a report with report
@@ -587,7 +580,7 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
 * memory in the order they were written to.
 * If not : (╯°□°)╯︵ ┻━┻
 */
-   while (OA_TAKEN(tail, read_tail) >= report_size) {
+   while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) {
void *report = stream->oa_buffer.vaddr + tail;
 
if (oa_report_id(stream, report) ||
@@ -601,9 +594,9 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
__ratelimit(>perf->tail_pointer_race))
drm_notice(>uncore->i915->drm,
   "unlanded report(s) head=0x%x tail=0x%x 
hw_tail=0x%x\n",
-head, tail, hw_tail);
+stream->oa_buffer.head, tail, hw_tail);
 
-   stream->oa_buffer.tail = gtt_offset + tail;
+   stream->oa_buffer.tail = tail;
 
pollin = OA_TAKEN(stream->oa_buffer.tail,
  stream->oa_buffer.head) >= report_size;
@@ -753,13 +746,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
 
spin_unlock_irqrestore(>oa_buffer.ptr_lock, flags);
 
-   /*
-* NB: oa_buffer.head/tail include the gtt_offset which we don't want
-* while indexing relative to oa_buf_base.
-*/
-   head -= gtt_offset;
-   tail -= gtt_offset;
-
/*
 * An out of bounds or misaligned head or tail pointer implies a driver
 * bug since we validate + align the tail pointers we read from the
@@ -895,9 +881,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
 * We removed the gtt_offset for the copy loop above, indexing
 * relative to oa_buf_base so put back here...
 */
-   head += gtt_offset;
intel_uncore_write(uncore, oaheadptr,
-  head & GEN12_OAG_OAHEADPTR_MASK);
+  (head + gtt_offset) & 
GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
 
spin_unlock_irqrestore(>oa_buffer.ptr_lock, flags);
@@ -1042,12 +1027,6 @@ static int gen7_append_oa_reports(struct 
i915_perf_stream *stream,
 
spin_unlock_irqrestore(>oa_buffer.ptr_lock, flags);
 
-   /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
-* while indexing relative to oa_buf_base.
-*/
-   head -= gtt_offset;
-   tail -= gtt_offset;

[PATCH] dim: Disallow remote branch deletions with 'dim push'

2023-06-01 Thread Ashutosh Dixit
An inadvertent 'dim push -d' can delete remote branches. Disallow such
remote branch deletions.

Signed-off-by: Ashutosh Dixit 
---
 dim | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/dim b/dim
index 126568e..e5899e6 100755
--- a/dim
+++ b/dim
@@ -1029,6 +1029,12 @@ function dim_push_branch
fi
fi
 
+   # Disallow remote branch deletions, say with 'dim push -d'
+   if [[ "$@" == *"-d"* ]]; then
+   echoerr "Attempt to delete remote branch, aborting."
+   return 1
+   fi
+
git_push $remote $branch "$@"
 
update_linux_next $branch drm-intel-next drm-intel-next-fixes 
drm-intel-fixes
-- 
2.38.0



[PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-24 Thread Ashutosh Dixit
No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

v2: Retain read/store helpers (Tvrtko)

Suggested-by: Tvrtko Ursulin 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 16 +++-
 drivers/gpu/drm/i915/i915_pmu.h |  2 +-
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 09313cf9316b4..f96fe92dca4e4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -191,31 +191,21 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-   return idx;
-}
-
 static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
 {
-   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+   return pmu->sample[gt_id][sample].cur;
 }
 
 static void
 store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
 {
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+   pmu->sample[gt_id][sample].cur = val;
 }
 
 static void
 add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, 
u32 mul)
 {
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
mul);
+   pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul);
 }
 
 static u64 get_rc6(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 33d80fbaab8bc..d20592e7db999 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -127,7 +127,7 @@ struct i915_pmu {
 * Only global counters are held here, while the per-engine ones are in
 * struct intel_engine_cs.
 */
-   struct i915_pmu_sample sample[I915_PMU_MAX_GTS * 
__I915_NUM_PMU_SAMPLERS];
+   struct i915_pmu_sample 
sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
/**
 * @sleep_last: Last time GT parked for RC6 estimation.
 */
-- 
2.38.0



[PATCH 1/2] drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked

2023-05-24 Thread Ashutosh Dixit
pmu_needs_timer() keeps the timer running even when GT is parked,
ostensibly to sample requested/actual frequencies. However
frequency_sample() has the following:

/* Report 0/0 (actual/requested) frequency while parked. */
if (!intel_gt_pm_get_if_awake(gt))
return;

The above code prevents frequencies to be sampled while the GT is
parked. So we might as well turn off the sampling timer itself in this
case and save CPU cycles/power.

v2: Instead of turning freq bits off, return false, since no counters will
run after this change when GT is parked (Tvrtko)
v3: Remove gpu_active argument of pmu_needs_timer (Andrzej)

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 16 +---
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a814583e19fd7..09313cf9316b4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -139,7 +139,7 @@ static u32 frequency_enabled_mask(void)
return mask;
 }
 
-static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
+static bool pmu_needs_timer(struct i915_pmu *pmu)
 {
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
u32 enable;
@@ -157,17 +157,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
 */
enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
 
-   /*
-* When the GPU is idle per-engine counters do not need to be
-* running so clear those bits out.
-*/
-   if (!gpu_active)
-   enable &= ~ENGINE_SAMPLE_MASK;
/*
 * Also there is software busyness tracking available we do not
 * need the timer for I915_SAMPLE_BUSY counter.
 */
-   else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
+   if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
enable &= ~BIT(I915_SAMPLE_BUSY);
 
/*
@@ -295,7 +289,7 @@ static void park_rc6(struct intel_gt *gt)
 
 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 {
-   if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) {
+   if (!pmu->timer_enabled && pmu_needs_timer(pmu)) {
pmu->timer_enabled = true;
pmu->timer_last = ktime_get();
hrtimer_start_range_ns(>timer,
@@ -321,7 +315,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt)
 */
pmu->unparked &= ~BIT(gt->info.id);
if (pmu->unparked == 0)
-   pmu->timer_enabled = pmu_needs_timer(pmu, false);
+   pmu->timer_enabled = false;
 
spin_unlock_irq(>lock);
 }
@@ -827,7 +821,7 @@ static void i915_pmu_disable(struct perf_event *event)
 */
if (--pmu->enable_count[bit] == 0) {
pmu->enable &= ~BIT(bit);
-   pmu->timer_enabled &= pmu_needs_timer(pmu, true);
+   pmu->timer_enabled &= pmu_needs_timer(pmu);
}
 
spin_unlock_irqrestore(>lock, flags);
-- 
2.38.0



[PATCH v2 0/2] drm/i915/pmu: couple of cleanups

2023-05-24 Thread Ashutosh Dixit
Cc: Andrzej Hajda 
Cc: Tvrtko Ursulin 

Signed-off-by: Ashutosh Dixit 

Ashutosh Dixit (2):
  drm/i915/pmu: Turn off the timer to sample frequencies when GT is
parked
  drm/i915/pmu: Make PMU sample array two-dimensional

 drivers/gpu/drm/i915/i915_pmu.c | 32 
 drivers/gpu/drm/i915/i915_pmu.h |  2 +-
 2 files changed, 9 insertions(+), 25 deletions(-)

-- 
2.38.0



[PATCH] drm/i915/perf: Clear out entire reports after reading if not power of 2 size

2023-05-23 Thread Ashutosh Dixit
Clearing out report id and timestamp as means to detect unlanded reports
only works if report size is power of 2. That is, only when report size is
a sub-multiple of the OA buffer size can we be certain that reports will
land at the same place each time in the OA buffer (after rewind). If report
size is not a power of 2, we need to zero out the entire report to be able
to detect unlanded reports reliably.

v2: Add Fixes tag (Umesh)

Fixes: 1cc064dce4ed ("drm/i915/perf: Add support for OA media units")
Reviewed-by: Umesh Nerlige Ramappa 
Reviewed-by: Lionel Landwerlin 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_perf.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 19d5652300eeb..58284156428dc 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
stream->oa_buffer.last_ctx_id = ctx_id;
}
 
-   /*
-* Clear out the report id and timestamp as a means to detect 
unlanded
-* reports.
-*/
-   oa_report_id_clear(stream, report32);
-   oa_timestamp_clear(stream, report32);
+   if (is_power_of_2(report_size)) {
+   /*
+* Clear out the report id and timestamp as a means
+* to detect unlanded reports.
+*/
+   oa_report_id_clear(stream, report32);
+   oa_timestamp_clear(stream, report32);
+   } else {
+   /* Zero out the entire report */
+   memset(report32, 0, report_size);
+   }
}
 
if (start_offset != *offset) {
-- 
2.38.0



[PATCH 1/2] drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked

2023-05-23 Thread Ashutosh Dixit
pmu_needs_timer() keeps the timer running even when GT is parked,
ostensibly to sample requested/actual frequencies. However
frequency_sample() has the following:

/* Report 0/0 (actual/requested) frequency while parked. */
if (!intel_gt_pm_get_if_awake(gt))
return;

The above code prevents frequencies to be sampled while the GT is
parked. So we might as well turn off the sampling timer itself in this
case and save CPU cycles/power.

v2: Instead of turning freq bits off, return false, since no counters will
run after this change when GT is parked (Tvrtko)

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a814583e19fd7..b47d890d4ada1 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -144,6 +144,10 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
u32 enable;
 
+   /* When GPU is idle, at present no counters need to run */
+   if (!gpu_active)
+   return false;
+
/*
 * Only some counters need the sampling timer.
 *
@@ -157,17 +161,11 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
 */
enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
 
-   /*
-* When the GPU is idle per-engine counters do not need to be
-* running so clear those bits out.
-*/
-   if (!gpu_active)
-   enable &= ~ENGINE_SAMPLE_MASK;
/*
 * Also there is software busyness tracking available we do not
 * need the timer for I915_SAMPLE_BUSY counter.
 */
-   else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
+   if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
enable &= ~BIT(I915_SAMPLE_BUSY);
 
/*
-- 
2.38.0



[PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

2023-05-23 Thread Ashutosh Dixit
No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

Suggested-by: Tvrtko Ursulin 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 60 ++---
 drivers/gpu/drm/i915/i915_pmu.h |  2 +-
 2 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b47d890d4ada1..137e0df9573ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-   return idx;
-}
-
-static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
-}
-
-static void
-store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
-}
-
-static void
-add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, 
u32 mul)
-{
-   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, 
mul);
-}
-
 static u64 get_rc6(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
@@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(>lock, flags);
 
if (awake) {
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
} else {
/*
 * We think we are runtime suspended.
@@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
 * counter value.
 */
val = ktime_since_raw(pmu->sleep_last[gt_id]);
-   val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
+   val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
}
 
-   if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
-   val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
+   if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+   val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
-   store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
+   pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
 
spin_unlock_irqrestore(>lock, flags);
 
@@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
u64 val = __get_rc6(gt);
 
-   store_sample(pmu, i, __I915_SAMPLE_RC6, val);
-   store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
-val);
+   pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
+   pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = 
val;
pmu->sleep_last[i] = ktime_get_raw();
}
}
@@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
 {
struct i915_pmu *pmu = >i915->pmu;
 
-   store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
+   pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
pmu->sleep_last[gt->info.id] = ktime_get_raw();
 }
 
@@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
}
 }
 
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+   sample->cur += mul_u32_u32(val, mul);
+}
+
 static bool
 frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
 {
@@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
if (!val)
val = intel_gpu_freq(rps, rps->cur_freq);
 
-   add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_ACT],
val, period_ns / 1000);
}
 
if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
-   add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
+   add_sample_mult(>sample[gt_id][__I915_SAMPLE_FREQ_REQ],
intel_rps_get_requested_frequency(rps),
period_ns / 1000);
}
@@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
  

[PATCH 0/2] drm/i915/pmu: couple of cleanups

2023-05-23 Thread Ashutosh Dixit
Ashutosh Dixit (2):
  drm/i915/pmu: Turn off the timer to sample frequencies when GT is
parked
  drm/i915/pmu: Make PMU sample array two-dimensional

 drivers/gpu/drm/i915/i915_pmu.c | 72 +++--
 drivers/gpu/drm/i915/i915_pmu.h |  2 +-
 2 files changed, 24 insertions(+), 50 deletions(-)

-- 
2.38.0



[PATCH] drm/i915/perf: Clear out entire reports after reading if not power of 2 size

2023-05-22 Thread Ashutosh Dixit
Clearing out report id and timestamp as means to detect unlanded reports
only works if report size is power of 2. That is, only when report size is
a sub-multiple of the OA buffer size can we be certain that reports will
land at the same place each time in the OA buffer (after rewind). If report
size is not a power of 2, we need to zero out the entire report to be able
to detect unlanded reports reliably.

Cc: Umesh Nerlige Ramappa 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_perf.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 19d5652300eeb..58284156428dc 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
stream->oa_buffer.last_ctx_id = ctx_id;
}
 
-   /*
-* Clear out the report id and timestamp as a means to detect 
unlanded
-* reports.
-*/
-   oa_report_id_clear(stream, report32);
-   oa_timestamp_clear(stream, report32);
+   if (is_power_of_2(report_size)) {
+   /*
+* Clear out the report id and timestamp as a means
+* to detect unlanded reports.
+*/
+   oa_report_id_clear(stream, report32);
+   oa_timestamp_clear(stream, report32);
+   } else {
+   /* Zero out the entire report */
+   memset(report32, 0, report_size);
+   }
}
 
if (start_offset != *offset) {
-- 
2.38.0



[PATCH v2] drm/i915/hwmon: Silence UBSAN uninitialized bool variable warning

2023-05-12 Thread Ashutosh Dixit
Loading i915 on UBSAN enabled kernels (CONFIG_UBSAN/CONFIG_UBSAN_BOOL)
causes the following warning:

  UBSAN: invalid-load in drivers/gpu/drm/i915/gt/uc/intel_uc.c:558:2
  load of value 255 is not a valid value for type '_Bool'
  Call Trace:
   dump_stack_lvl+0x57/0x7d
   ubsan_epilogue+0x5/0x40
   __ubsan_handle_load_invalid_value.cold+0x43/0x48
   __uc_init_hw+0x76a/0x903 [i915]
   ...
   i915_driver_probe+0xfb1/0x1eb0 [i915]
   i915_pci_probe+0xbe/0x2d0 [i915]

The warning happens because during probe i915_hwmon is still not available
which results in the output boolean variable *old remaining
uninitialized. Silence the warning by initializing the variable to an
arbitrary value.

v2: Move variable initialization to the declaration (Andi)

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 1381943b8973d..c8b9cbb7ba3a9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -465,7 +465,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
int ret, attempts;
-   bool pl1en;
+   bool pl1en = false;
 
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
-- 
2.38.0



[PATCH] drm/i915/pmu: Turn off the timer to sample frequencies when GT is parked

2023-05-11 Thread Ashutosh Dixit
pmu_needs_timer() keeps the timer running even when GT is parked,
ostensibly to sample requested/actual frequencies. However
frequency_sample() has the following:

/* Report 0/0 (actual/requested) frequency while parked. */
if (!intel_gt_pm_get_if_awake(gt))
return;

The above code prevents frequencies to be sampled while the GT is
parked. So we might as well turn off the sampling timer itself in this
case and save CPU cycles/power.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 7ece883a7d956..8db1d681cf4ab 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -124,11 +124,14 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
  ENGINE_SAMPLE_MASK;
 
/*
-* When the GPU is idle per-engine counters do not need to be
-* running so clear those bits out.
+* When GPU is idle, frequency or per-engine counters do not need
+* to be running so clear those bits out.
 */
-   if (!gpu_active)
-   enable &= ~ENGINE_SAMPLE_MASK;
+   if (!gpu_active) {
+   enable &= ~(config_mask(I915_PMU_ACTUAL_FREQUENCY) |
+   config_mask(I915_PMU_REQUESTED_FREQUENCY) |
+   ENGINE_SAMPLE_MASK);
+   }
/*
 * Also there is software busyness tracking available we do not
 * need the timer for I915_SAMPLE_BUSY counter.
-- 
2.38.0



[PATCH] drm/i915/hwmon: Silence UBSAN uninitialized bool variable warning

2023-05-10 Thread Ashutosh Dixit
Loading i915 on UBSAN enabled kernels (CONFIG_UBSAN/CONFIG_UBSAN_BOOL)
causes the following warning:

  UBSAN: invalid-load in drivers/gpu/drm/i915/gt/uc/intel_uc.c:558:2
  load of value 255 is not a valid value for type '_Bool'
  Call Trace:
   dump_stack_lvl+0x57/0x7d
   ubsan_epilogue+0x5/0x40
   __ubsan_handle_load_invalid_value.cold+0x43/0x48
   __uc_init_hw+0x76a/0x903 [i915]
   ...
   i915_driver_probe+0xfb1/0x1eb0 [i915]
   i915_pci_probe+0xbe/0x2d0 [i915]

The warning happens because during probe i915_hwmon is still not available
which results in the output boolean variable *old remaining
uninitialized. Silence the warning by initializing the variable to an
arbitrary value.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index a3bdd9f68a458..685663861bc0b 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -502,8 +502,11 @@ void i915_hwmon_power_max_disable(struct drm_i915_private 
*i915, bool *old)
struct i915_hwmon *hwmon = i915->hwmon;
u32 r;
 
-   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) {
+   /* Fix uninitialized bool variable warning */
+   *old = false;
return;
+   }
 
mutex_lock(>hwmon_lock);
 
-- 
2.38.0



[PATCH v6 0/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-04-20 Thread Ashutosh Dixit
v6: Update Patch 3 to remove the timeout when blocked
v1-v5: Please see individual patches for revision history

Ashutosh Dixit (3):
  drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write
  drm/i915/guc: Disable PL1 power limit when loading GuC firmware
  drm/i915/hwmon: Block waiting for GuC reset to complete

 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++-
 drivers/gpu/drm/i915/i915_hwmon.c | 87 +++
 drivers/gpu/drm/i915/i915_hwmon.h |  7 +++
 3 files changed, 93 insertions(+), 14 deletions(-)

-- 
2.38.0



[PATCH 2/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-04-20 Thread Ashutosh Dixit
On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when allowed by HW when loading GuC firmware.

v2:
 - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
 - Add hwm_power_max_restore to error return code path

v3 (Jani N):
 - Add/remove explanatory comments
 - Function renames
 - Type corrections
 - Locking annotation

v4:
 - Don't hold the lock across GuC reset (Rodrigo)
 - New locking scheme (suggested by Rodrigo)
 - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko)

v5:
 - Fix uninitialized pl1en variable compile warning reported by kernel
   build robot by creating new err_rps label

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++--
 drivers/gpu/drm/i915/i915_hwmon.c | 40 +++
 drivers/gpu/drm/i915/i915_hwmon.h |  7 +
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cba..996168312340e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
 #include "intel_uc.h"
 
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 
 static const struct intel_uc_ops uc_ops_off;
 static const struct intel_uc_ops uc_ops_on;
@@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
int ret, attempts;
+   bool pl1en;
 
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
 
+   /* Disable a potentially low PL1 power limit to allow freq to be raised 
*/
+   i915_hwmon_power_max_disable(gt->i915, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
 
while (attempts--) {
@@ -500,7 +505,7 @@ static int __uc_init_hw(struct intel_uc *uc)
 */
ret = __uc_sanitize(uc);
if (ret)
-   goto err_out;
+   goto err_rps;
 
intel_huc_fw_upload(huc);
intel_guc_ads_reset(guc);
@@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
 
@@ -559,10 +566,12 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_guc_submission_disable(guc);
 err_log_capture:
__uc_capture_load_err_log(uc);
-err_out:
+err_rps:
/* Return GT back to RPn */
intel_rps_lower_unslice(_to_gt(uc)->rps);
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+err_out:
__uc_sanitize(uc);
 
if (!ret) {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 7f44e809ca155..9ab8971679fe3 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -50,6 +50,7 @@ struct hwm_drvdata {
struct hwm_energy_info ei;  /*  Energy info for 
energy1_input */
char name[12];
int gt_n;
+   bool reset_in_progress;
 };
 
 struct i915_hwmon {
@@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
u32 nval;
 
mutex_lock(>hwmon_lock);
+   if (hwmon->ddat.reset_in_progress) {
+   ret = -EAGAIN;
+   goto unlock;
+   }
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
 
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
@@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
 exit:
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
+unlock:
mutex_unlock(>hwmon_lock);
return ret;
 }
@@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
 }
 
+void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   mutex_

[PATCH 3/3] drm/i915/hwmon: Block waiting for GuC reset to complete

2023-04-20 Thread Ashutosh Dixit
Instead of erroring out when GuC reset is in progress, block waiting for
GuC reset to complete which is a more reasonable uapi behavior.

v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
v3: Remove timeout when blocked (Tvrtko)

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 29 +
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 9ab8971679fe3..a3bdd9f68a458 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -51,6 +51,7 @@ struct hwm_drvdata {
char name[12];
int gt_n;
bool reset_in_progress;
+   wait_queue_head_t waitq;
 };
 
 struct i915_hwmon {
@@ -397,14 +398,32 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
intel_wakeref_t wakeref;
+   DEFINE_WAIT(wait);
int ret = 0;
u32 nval;
 
-   mutex_lock(>hwmon_lock);
-   if (hwmon->ddat.reset_in_progress) {
-   ret = -EAGAIN;
-   goto unlock;
+   /* Block waiting for GuC reset to complete when needed */
+   for (;;) {
+   mutex_lock(>hwmon_lock);
+
+   prepare_to_wait(>waitq, , TASK_INTERRUPTIBLE);
+
+   if (!hwmon->ddat.reset_in_progress)
+   break;
+
+   if (signal_pending(current)) {
+   ret = -EINTR;
+   break;
+   }
+
+   mutex_unlock(>hwmon_lock);
+
+   schedule();
}
+   finish_wait(>waitq, );
+   if (ret)
+   goto unlock;
+
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
 
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
@@ -508,6 +527,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private 
*i915, bool old)
intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
hwmon->ddat.reset_in_progress = false;
+   wake_up_all(>ddat.waitq);
 
mutex_unlock(>hwmon_lock);
 }
@@ -784,6 +804,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
ddat->uncore = >uncore;
snprintf(ddat->name, sizeof(ddat->name), "i915");
ddat->gt_n = -1;
+   init_waitqueue_head(>waitq);
 
for_each_gt(gt, i915, i) {
ddat_gt = hwmon->ddat_gt + i;
-- 
2.38.0



[PATCH 1/3] drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write

2023-04-20 Thread Ashutosh Dixit
In preparation for follow-on patches, refactor hwm_power_max_write to take
hwmon_lock and runtime pm wakeref at start of the function and release them
at the end, therefore acquiring these just once each.

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 8e7dccc8d3a0e..7f44e809ca155 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -396,31 +396,33 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
intel_wakeref_t wakeref;
+   int ret = 0;
u32 nval;
 
+   mutex_lock(>hwmon_lock);
+   wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
+
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
if (val == PL1_DISABLE) {
-   mutex_lock(>hwmon_lock);
-   with_intel_runtime_pm(ddat->uncore->rpm, wakeref) {
-   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
-PKG_PWR_LIM_1_EN, 0);
-   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
-   }
-   mutex_unlock(>hwmon_lock);
+   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);
+   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
 
if (nval & PKG_PWR_LIM_1_EN)
-   return -ENODEV;
-   return 0;
+   ret = -ENODEV;
+   goto exit;
}
 
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
 
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1,
-   nval);
-   return 0;
+   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
+exit:
+   intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
+   mutex_unlock(>hwmon_lock);
+   return ret;
 }
 
 static int
-- 
2.38.0



[PATCH 3/3] drm/i915/hwmon: Block waiting for GuC reset to complete

2023-04-10 Thread Ashutosh Dixit
Instead of erroring out when GuC reset is in progress, block waiting for
GuC reset to complete which is a more reasonable uapi behavior.

v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 38 +++
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 9ab8971679fe3..8471a667dfc71 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -51,6 +51,7 @@ struct hwm_drvdata {
char name[12];
int gt_n;
bool reset_in_progress;
+   wait_queue_head_t waitq;
 };
 
 struct i915_hwmon {
@@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
 static int
 hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
+#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
+
+   int ret = 0, timeout = GUC_RESET_TIMEOUT;
struct i915_hwmon *hwmon = ddat->hwmon;
intel_wakeref_t wakeref;
-   int ret = 0;
+   DEFINE_WAIT(wait);
u32 nval;
 
-   mutex_lock(>hwmon_lock);
-   if (hwmon->ddat.reset_in_progress) {
-   ret = -EAGAIN;
-   goto unlock;
+   /* Block waiting for GuC reset to complete when needed */
+   for (;;) {
+   mutex_lock(>hwmon_lock);
+
+   prepare_to_wait(>waitq, , TASK_INTERRUPTIBLE);
+
+   if (!hwmon->ddat.reset_in_progress)
+   break;
+
+   if (signal_pending(current)) {
+   ret = -EINTR;
+   break;
+   }
+
+   if (!timeout) {
+   ret = -ETIME;
+   break;
+   }
+
+   mutex_unlock(>hwmon_lock);
+
+   timeout = schedule_timeout(timeout);
}
+   finish_wait(>waitq, );
+   if (ret)
+   goto unlock;
+
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
 
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
@@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private 
*i915, bool old)
intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
hwmon->ddat.reset_in_progress = false;
+   wake_up_all(>ddat.waitq);
 
mutex_unlock(>hwmon_lock);
 }
@@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
ddat->uncore = >uncore;
snprintf(ddat->name, sizeof(ddat->name), "i915");
ddat->gt_n = -1;
+   init_waitqueue_head(>waitq);
 
for_each_gt(gt, i915, i) {
ddat_gt = hwmon->ddat_gt + i;
-- 
2.38.0



[PATCH 1/3] drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write

2023-04-10 Thread Ashutosh Dixit
In preparation for follow-on patches, refactor hwm_power_max_write to take
hwmon_lock and runtime pm wakeref at start of the function and release them
at the end, therefore acquiring these just once each.

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 8e7dccc8d3a0e..7f44e809ca155 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -396,31 +396,33 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
intel_wakeref_t wakeref;
+   int ret = 0;
u32 nval;
 
+   mutex_lock(>hwmon_lock);
+   wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
+
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
if (val == PL1_DISABLE) {
-   mutex_lock(>hwmon_lock);
-   with_intel_runtime_pm(ddat->uncore->rpm, wakeref) {
-   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
-PKG_PWR_LIM_1_EN, 0);
-   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
-   }
-   mutex_unlock(>hwmon_lock);
+   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);
+   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
 
if (nval & PKG_PWR_LIM_1_EN)
-   return -ENODEV;
-   return 0;
+   ret = -ENODEV;
+   goto exit;
}
 
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
 
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1,
-   nval);
-   return 0;
+   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
+exit:
+   intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
+   mutex_unlock(>hwmon_lock);
+   return ret;
 }
 
 static int
-- 
2.38.0



[PATCH 0/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-04-10 Thread Ashutosh Dixit
Updates to Patch 2/3 and Patch 3/3 in this version.

Ashutosh Dixit (3):
  drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write
  drm/i915/guc: Disable PL1 power limit when loading GuC firmware
  drm/i915/hwmon: Block waiting for GuC reset to complete

 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++-
 drivers/gpu/drm/i915/i915_hwmon.c | 94 +++
 drivers/gpu/drm/i915/i915_hwmon.h |  7 ++
 3 files changed, 100 insertions(+), 14 deletions(-)

-- 
2.38.0



[PATCH 2/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-04-10 Thread Ashutosh Dixit
On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when allowed by HW when loading GuC firmware.

v2:
 - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
 - Add hwm_power_max_restore to error return code path

v3 (Jani N):
 - Add/remove explanatory comments
 - Function renames
 - Type corrections
 - Locking annotation

v4:
 - Don't hold the lock across GuC reset (Rodrigo)
 - New locking scheme (suggested by Rodrigo)
 - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko)

v5:
 - Fix uninitialized pl1en variable compile warning reported by kernel
   build robot by creating new err_rps label

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 13 +++--
 drivers/gpu/drm/i915/i915_hwmon.c | 40 +++
 drivers/gpu/drm/i915/i915_hwmon.h |  7 +
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cba..996168312340e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
 #include "intel_uc.h"
 
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 
 static const struct intel_uc_ops uc_ops_off;
 static const struct intel_uc_ops uc_ops_on;
@@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
int ret, attempts;
+   bool pl1en;
 
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
 
+   /* Disable a potentially low PL1 power limit to allow freq to be raised 
*/
+   i915_hwmon_power_max_disable(gt->i915, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
 
while (attempts--) {
@@ -500,7 +505,7 @@ static int __uc_init_hw(struct intel_uc *uc)
 */
ret = __uc_sanitize(uc);
if (ret)
-   goto err_out;
+   goto err_rps;
 
intel_huc_fw_upload(huc);
intel_guc_ads_reset(guc);
@@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
 
@@ -559,10 +566,12 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_guc_submission_disable(guc);
 err_log_capture:
__uc_capture_load_err_log(uc);
-err_out:
+err_rps:
/* Return GT back to RPn */
intel_rps_lower_unslice(_to_gt(uc)->rps);
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+err_out:
__uc_sanitize(uc);
 
if (!ret) {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 7f44e809ca155..9ab8971679fe3 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -50,6 +50,7 @@ struct hwm_drvdata {
struct hwm_energy_info ei;  /*  Energy info for 
energy1_input */
char name[12];
int gt_n;
+   bool reset_in_progress;
 };
 
 struct i915_hwmon {
@@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
u32 nval;
 
mutex_lock(>hwmon_lock);
+   if (hwmon->ddat.reset_in_progress) {
+   ret = -EAGAIN;
+   goto unlock;
+   }
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
 
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
@@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
 exit:
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
+unlock:
mutex_unlock(>hwmon_lock);
return ret;
 }
@@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
 }
 
+void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   mutex_

[PATCH 2/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-04-05 Thread Ashutosh Dixit
On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when allowed by HW when loading GuC firmware.

v2:
 - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
 - Add hwm_power_max_restore to error return code path

v3 (Jani N):
 - Add/remove explanatory comments
 - Function renames
 - Type corrections
 - Locking annotation

v4:
 - Don't hold the lock across GuC reset (Rodrigo)
 - New locking scheme (suggested by Rodrigo)
 - Eliminate rpm_get in power_max_disable/restore, not needed (Tvrtko)

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 ++
 drivers/gpu/drm/i915/i915_hwmon.c | 40 +++
 drivers/gpu/drm/i915/i915_hwmon.h |  7 +
 3 files changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cba..aa8e35a5636a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
 #include "intel_uc.h"
 
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 
 static const struct intel_uc_ops uc_ops_off;
 static const struct intel_uc_ops uc_ops_on;
@@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
int ret, attempts;
+   bool pl1en;
 
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
 
+   /* Disable a potentially low PL1 power limit to allow freq to be raised 
*/
+   i915_hwmon_power_max_disable(gt->i915, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
 
while (attempts--) {
@@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
 
@@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc)
/* Return GT back to RPn */
intel_rps_lower_unslice(_to_gt(uc)->rps);
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+
__uc_sanitize(uc);
 
if (!ret) {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 7f44e809ca155..9ab8971679fe3 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -50,6 +50,7 @@ struct hwm_drvdata {
struct hwm_energy_info ei;  /*  Energy info for 
energy1_input */
char name[12];
int gt_n;
+   bool reset_in_progress;
 };
 
 struct i915_hwmon {
@@ -400,6 +401,10 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
u32 nval;
 
mutex_lock(>hwmon_lock);
+   if (hwmon->ddat.reset_in_progress) {
+   ret = -EAGAIN;
+   goto unlock;
+   }
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
 
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
@@ -421,6 +426,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
 exit:
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
+unlock:
mutex_unlock(>hwmon_lock);
return ret;
 }
@@ -472,6 +478,40 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
 }
 
+void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   mutex_lock(>hwmon_lock);
+
+   hwmon->ddat.reset_in_progress = true;
+   r = intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);
+   *old = !!(r & PKG_PWR_LIM_1_EN);
+
+   mutex_unlock(>hwmon_lock);
+}
+
+void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   mutex_lock(>hwmon_lock);
+
+   intel_uncore_rmw(hwmon->ddat.uncore, hwm

[PATCH 1/3] drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write

2023-04-05 Thread Ashutosh Dixit
In preparation for follow-on patches, refactor hwm_power_max_write to take
hwmon_lock and runtime pm wakeref at start of the function and release them
at the end, therefore acquiring these just once each.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 8e7dccc8d3a0e..7f44e809ca155 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -396,31 +396,33 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
intel_wakeref_t wakeref;
+   int ret = 0;
u32 nval;
 
+   mutex_lock(>hwmon_lock);
+   wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
+
/* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
if (val == PL1_DISABLE) {
-   mutex_lock(>hwmon_lock);
-   with_intel_runtime_pm(ddat->uncore->rpm, wakeref) {
-   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
-PKG_PWR_LIM_1_EN, 0);
-   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
-   }
-   mutex_unlock(>hwmon_lock);
+   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);
+   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
 
if (nval & PKG_PWR_LIM_1_EN)
-   return -ENODEV;
-   return 0;
+   ret = -ENODEV;
+   goto exit;
}
 
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
 
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1,
-   nval);
-   return 0;
+   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
+exit:
+   intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
+   mutex_unlock(>hwmon_lock);
+   return ret;
 }
 
 static int
-- 
2.38.0



[PATCH 3/3] drm/i915/hwmon: Block waiting for GuC reset to complete

2023-04-05 Thread Ashutosh Dixit
Instead of erroring out when GuC reset is in progress, block waiting for
GuC reset to complete which is a more reasonable uapi behavior.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 9ab8971679fe3..4343efb48e61b 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -51,6 +51,7 @@ struct hwm_drvdata {
char name[12];
int gt_n;
bool reset_in_progress;
+   wait_queue_head_t wqh;
 };
 
 struct i915_hwmon {
@@ -400,10 +401,15 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
int ret = 0;
u32 nval;
 
+retry:
mutex_lock(>hwmon_lock);
if (hwmon->ddat.reset_in_progress) {
-   ret = -EAGAIN;
-   goto unlock;
+   mutex_unlock(>hwmon_lock);
+   ret = wait_event_interruptible(ddat->wqh,
+  !hwmon->ddat.reset_in_progress);
+   if (ret)
+   return ret;
+   goto retry;
}
wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
 
@@ -426,7 +432,6 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
 exit:
intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
-unlock:
mutex_unlock(>hwmon_lock);
return ret;
 }
@@ -508,6 +513,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private 
*i915, bool old)
intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
 PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
hwmon->ddat.reset_in_progress = false;
+   wake_up_all(>ddat.wqh);
 
mutex_unlock(>hwmon_lock);
 }
@@ -784,6 +790,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
ddat->uncore = >uncore;
snprintf(ddat->name, sizeof(ddat->name), "i915");
ddat->gt_n = -1;
+   init_waitqueue_head(>wqh);
 
for_each_gt(gt, i915, i) {
ddat_gt = hwmon->ddat_gt + i;
-- 
2.38.0



[PATCH v4 0/3] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-04-05 Thread Ashutosh Dixit
Split the v3 patch into 3 patches for easier review, can squash later if needed.

Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 

Ashutosh Dixit (3):
  drm/i915/hwmon: Get mutex and rpm ref just once in hwm_power_max_write
  drm/i915/guc: Disable PL1 power limit when loading GuC firmware
  drm/i915/hwmon: Block waiting for GuC reset to complete

 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 
 drivers/gpu/drm/i915/i915_hwmon.c | 75 ++-
 drivers/gpu/drm/i915/i915_hwmon.h |  7 +++
 3 files changed, 78 insertions(+), 13 deletions(-)

-- 
2.38.0



[PATCH v3] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit

2023-03-31 Thread Ashutosh Dixit
On ATSM the PL1 limit is disabled at power up. The previous uapi assumed
that the PL1 limit is always enabled and therefore did not have a notion of
a disabled PL1 limit. This results in erroneous PL1 limit values when the
PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit
was previously shown as 0 which means a low PL1 limit whereas the limit
being disabled actually implies a high effective PL1 limit value.

To get round this problem, the PL1 limit uapi is expanded to include a
special value 0 to designate a disabled PL1 limit. A read value of 0 means
that the PL1 power limit is disabled, writing 0 disables the limit.

The link between this patch and the bugs mentioned below is as follows:
* Because on ATSM the PL1 power limit is disabled on power up and there
  were no means to enable it, we previously implemented the means to
  enable the limit when the PL1 hwmon entry (power1_max) was written to.
* Now there is a IGT igt@i915_hwmon@hwmon_write which (a) reads orig value
  from all hwmon sysfs  (b) does a bunch of random writes and finally (c)
  restores the orig value read. On ATSM since the orig value is 0, when
  the IGT restores the 0 value, the PL1 limit is now enabled with a value
  of 0.
* PL1 limit of 0 implies a low PL1 limit which causes GPU freq to fall to
  100 MHz. This causes GuC FW load and several IGT's to start timing out
  and gives rise to these Intel CI bugs. After this patch, writing 0 would
  disable the PL1 limit instead of enabling it, avoiding the freq drop
  issue.

v2: Add explanation for bugs mentioned below (Rodrigo)
v3: Eliminate race during PL1 disable and verify (Tvrtko)
Change return to -ENODEV if verify fails (Tvrtko)

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8060
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  4 ++-
 drivers/gpu/drm/i915/i915_hwmon.c | 26 +++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 2d6a472eef885..8d7d8f05f6cd0 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -14,7 +14,9 @@ Description:  RW. Card reactive sustained  (PL1/Tau) power 
limit in microwatts.
 
The power controller will throttle the operating frequency
if the power averaged over a window (typically seconds)
-   exceeds this limit.
+   exceeds this limit. A read value of 0 means that the PL1
+   power limit is disabled, writing 0 disables the
+   limit. Writing values > 0 will enable the power limit.
 
Only supported for particular Intel i915 graphics platforms.
 
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 596dd2c070106..8e7dccc8d3a0e 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 
attr, int chan)
}
 }
 
+#define PL1_DISABLE 0
+
 /*
  * HW allows arbitrary PL1 limits to be set but silently clamps these values to
  * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the
@@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
intel_wakeref_t wakeref;
u64 r, min, max;
 
+   /* Check if PL1 limit is disabled */
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
+   if (!(r & PKG_PWR_LIM_1_EN)) {
+   *val = PL1_DISABLE;
+   return 0;
+   }
+
*val = hwm_field_read_and_scale(ddat,
hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1,
@@ -385,8 +395,24 @@ static int
 hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
u32 nval;
 
+   /* Disable PL1 limit and verify, because the limit cannot be disabled 
on all platforms */
+   if (val == PL1_DISABLE) {
+   mutex_lock(>hwmon_lock);
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref) {
+   intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);
+   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
+   }
+   mutex_unlock(>hwmon_lock);
+
+   if (nval & PKG_PWR_LIM_1_EN)
+   return -ENODEV;
+   return 0;
+   }
+
/* Computation 

[PATCH v2] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit

2023-03-30 Thread Ashutosh Dixit
On ATSM the PL1 limit is disabled at power up. The previous uapi assumed
that the PL1 limit is always enabled and therefore did not have a notion of
a disabled PL1 limit. This results in erroneous PL1 limit values when the
PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit
was previously shown as 0 which means a low PL1 limit whereas the limit
being disabled actually implies a high effective PL1 limit value.

To get round this problem, the PL1 limit uapi is expanded to include a
special value 0 to designate a disabled PL1 limit. A read value of 0 means
that the PL1 power limit is disabled, writing 0 disables the limit.

The link between this patch and the bugs mentioned below is as follows:
* Because on ATSM the PL1 power limit is disabled on power up and there
  were no means to enable it, we previously implemented the means to
  enable the limit when the PL1 hwmon entry (power1_max) was written to.
* Now there is a IGT igt@i915_hwmon@hwmon_write which (a) reads orig value
  from all hwmon sysfs  (b) does a bunch of random writes and finally (c)
  restores the orig value read. On ATSM since the orig value is 0, when
  the IGT restores the 0 value, the PL1 limit is now enabled with a value
  of 0.
* PL1 limit of 0 implies a low PL1 limit which causes GPU freq to fall to
  100 MHz. This causes GuC FW load and several IGT's to start timing out
  and gives rise to these Intel CI bugs. After this patch, writing 0 would
  disable the PL1 limit instead of enabling it, avoiding the freq drop
  issue.

v2: Add explanation for bugs mentioned below (Rodrigo)

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8060
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  4 +++-
 drivers/gpu/drm/i915/i915_hwmon.c | 24 +++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 2d6a472eef885..8d7d8f05f6cd0 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -14,7 +14,9 @@ Description:  RW. Card reactive sustained  (PL1/Tau) power 
limit in microwatts.
 
The power controller will throttle the operating frequency
if the power averaged over a window (typically seconds)
-   exceeds this limit.
+   exceeds this limit. A read value of 0 means that the PL1
+   power limit is disabled, writing 0 disables the
+   limit. Writing values > 0 will enable the power limit.
 
Only supported for particular Intel i915 graphics platforms.
 
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 596dd2c070106..c099057888914 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 
attr, int chan)
}
 }
 
+#define PL1_DISABLE 0
+
 /*
  * HW allows arbitrary PL1 limits to be set but silently clamps these values to
  * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the
@@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
intel_wakeref_t wakeref;
u64 r, min, max;
 
+   /* Check if PL1 limit is disabled */
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
+   if (!(r & PKG_PWR_LIM_1_EN)) {
+   *val = PL1_DISABLE;
+   return 0;
+   }
+
*val = hwm_field_read_and_scale(ddat,
hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1,
@@ -385,8 +395,22 @@ static int
 hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
u32 nval;
 
+   if (val == PL1_DISABLE) {
+   /* Disable PL1 limit */
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN, 0);
+
+   /* Verify, because PL1 limit cannot be disabled on all 
platforms */
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
+   if (nval & PKG_PWR_LIM_1_EN)
+   return -EPERM;
+   return 0;
+   }
+
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
-- 
2.38.0



[PATCH] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit

2023-03-28 Thread Ashutosh Dixit
On ATSM the PL1 limit is disabled at power up. The previous uapi assumed
that the PL1 limit is always enabled and therefore did not have a notion of
a disabled PL1 limit. This results in erroneous PL1 limit values when the
PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit
was previously shown as 0 which means a low PL1 limit whereas the limit
being disabled actually implies a high effective PL1 limit value.

To get round this problem, the PL1 limit uapi is expanded to include a
special value 0 to designate a disabled PL1 limit.

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8060
Signed-off-by: Ashutosh Dixit 
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  3 ++-
 drivers/gpu/drm/i915/i915_hwmon.c | 24 +++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 2d6a472eef885..96fec0bb74c2c 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -14,7 +14,8 @@ Description:  RW. Card reactive sustained  (PL1/Tau) power 
limit in microwatts.
 
The power controller will throttle the operating frequency
if the power averaged over a window (typically seconds)
-   exceeds this limit.
+   exceeds this limit. A read value of 0 means that the PL1 power
+   limit is disabled. Writing 0 disables the limit if possible.
 
Only supported for particular Intel i915 graphics platforms.
 
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 596dd2c070106..c099057888914 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 
attr, int chan)
}
 }
 
+#define PL1_DISABLE 0
+
 /*
  * HW allows arbitrary PL1 limits to be set but silently clamps these values to
  * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the
@@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
intel_wakeref_t wakeref;
u64 r, min, max;
 
+   /* Check if PL1 limit is disabled */
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
+   if (!(r & PKG_PWR_LIM_1_EN)) {
+   *val = PL1_DISABLE;
+   return 0;
+   }
+
*val = hwm_field_read_and_scale(ddat,
hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1,
@@ -385,8 +395,22 @@ static int
 hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
u32 nval;
 
+   if (val == PL1_DISABLE) {
+   /* Disable PL1 limit */
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN, 0);
+
+   /* Verify, because PL1 limit cannot be disabled on all 
platforms */
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   nval = intel_uncore_read(ddat->uncore, 
hwmon->rg.pkg_rapl_limit);
+   if (nval & PKG_PWR_LIM_1_EN)
+   return -EPERM;
+   return 0;
+   }
+
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
-- 
2.38.0



[PATCH] Revert "drm/i915/hwmon: Enable PL1 power limit"

2023-03-19 Thread Ashutosh Dixit
This reverts commit ee892ea83d99610fa33bea612de058e0955eec3a.

0349c41b0596 ("drm/i915/hwmon: Enable PL1 power limit") was reverted in
05d5562e401e ("Revert "drm/i915/hwmon: Enable PL1 power limit"") but has
appeared again as ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power
limit"). Revert it again.

Cc:  # v6.2+
Cc: Jani Nikula 
Cc: Rodrigo Vivi 
Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Fixes: ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power limit")
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index ee63a8fd88fc1..596dd2c070106 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -688,11 +688,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915)
for_each_gt(gt, i915, i)
hwm_energy(>ddat_gt[i], );
}
-
-   /* Enable PL1 power limit */
-   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1_EN, 
PKG_PWR_LIM_1_EN);
 }
 
 void i915_hwmon_register(struct drm_i915_private *i915)
-- 
2.38.0



[PATCH] Revert "drm/i915/hwmon: Enable PL1 power limit"

2023-03-18 Thread Ashutosh Dixit
This reverts commit ee892ea83d99610fa33bea612de058e0955eec3a.

0349c41b0596 ("drm/i915/hwmon: Enable PL1 power limit") was reverted in
05d5562e401e ("Revert "drm/i915/hwmon: Enable PL1 power limit"") but has
appeared again as ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power
limit"). Revert it again.

Cc: Jani Nikula 
Cc: Rodrigo Vivi 
Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Fixes: ee892ea83d99 ("drm/i915/hwmon: Enable PL1 power limit")
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index ee63a8fd88fc1..596dd2c070106 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -688,11 +688,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915)
for_each_gt(gt, i915, i)
hwm_energy(>ddat_gt[i], );
}
-
-   /* Enable PL1 power limit */
-   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1_EN, 
PKG_PWR_LIM_1_EN);
 }
 
 void i915_hwmon_register(struct drm_i915_private *i915)
-- 
2.38.0



[PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-03-15 Thread Ashutosh Dixit
On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when allowed by HW when loading GuC firmware.

v2:
 - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
 - Add hwm_power_max_restore to error return code path

v3 (Jani N):
 - Add/remove explanatory comments
 - Function renames
 - Type corrections
 - Locking annotation

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 +++
 drivers/gpu/drm/i915/i915_hwmon.c | 39 +++
 drivers/gpu/drm/i915/i915_hwmon.h |  7 +
 3 files changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cba..aa8e35a5636a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
 #include "intel_uc.h"
 
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 
 static const struct intel_uc_ops uc_ops_off;
 static const struct intel_uc_ops uc_ops_on;
@@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
int ret, attempts;
+   bool pl1en;
 
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
 
+   /* Disable a potentially low PL1 power limit to allow freq to be raised 
*/
+   i915_hwmon_power_max_disable(gt->i915, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
 
while (attempts--) {
@@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
 
@@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc)
/* Return GT back to RPn */
intel_rps_lower_unslice(_to_gt(uc)->rps);
 
+   i915_hwmon_power_max_restore(gt->i915, pl1en);
+
__uc_sanitize(uc);
 
if (!ret) {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index ee63a8fd88fc1..769b5bda4d53f 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
 }
 
+void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old)
+   __acquires(i915->hwmon->hwmon_lock)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   intel_wakeref_t wakeref;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   /* Take mutex to prevent concurrent hwm_power_max_write */
+   mutex_lock(>hwmon_lock);
+
+   with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
+   r = intel_uncore_rmw(hwmon->ddat.uncore,
+hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);
+
+   *old = !!(r & PKG_PWR_LIM_1_EN);
+}
+
+void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
+   __releases(i915->hwmon->hwmon_lock)
+{
+   struct i915_hwmon *hwmon = i915->hwmon;
+   intel_wakeref_t wakeref;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
+   intel_uncore_rmw(hwmon->ddat.uncore,
+hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN,
+old ? PKG_PWR_LIM_1_EN : 0);
+
+   mutex_unlock(>hwmon_lock);
+}
+
 static umode_t
 hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
 {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.h 
b/drivers/gpu/drm/i915/i915_hwmon.h
index 7ca9cf2c34c96..0fcb7de844061 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.h
+++ b/drivers/gpu/drm/i915/i915_hwmon.h
@@ -7,14 +7,21 @@
 #ifndef __I915_HWMON_H__
 #define __I915_HWMON_H__
 
+#include 
+
 struct drm_i915_private;
+struct intel_gt;
 
 #if IS_REACHABLE(CONFIG_HWMON)
 void i915_hwmon_register(struct drm_i915_private *i915);
 void i915_hwmon_unregister(struct drm_i915_pri

[PATCH] drm/i915/pmu: Use functions common with sysfs to read actual freq

2023-03-15 Thread Ashutosh Dixit
Expose intel_rps_read_actual_frequency_fw to read the actual freq without
taking forcewake for use by PMU. The code is refactored to use a common set
of functions across sysfs and PMU. Using common functions with sysfs in PMU
solves the issues of missing support for MTL and missing support for older
generations (prior to Gen6). It also future proofs the PMU where sometimes
code has been updated for sysfs and PMU has been missed.

v2: Remove runtime_pm_if_in_use from read_actual_frequency_fw (Tvrtko)

v3: (Tvrtko)
 - Remove goto in __read_cagf
 - Unexport intel_rps_get_cagf and intel_rps_read_punit_req

Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL")
Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8280
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 38 -
 drivers/gpu/drm/i915/gt/intel_rps.h |  4 +--
 drivers/gpu/drm/i915/i915_pmu.c | 10 +++-
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4d0dc9de23f96..6d7395aa404a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
-{
-   struct drm_i915_private *i915 = rps_to_i915(rps);
-   i915_reg_t rpstat;
-
-   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
-   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
 u32 intel_rps_read_rpstat(struct intel_rps *rps)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2066,7 +2056,7 @@ u32 intel_rps_read_rpstat(struct intel_rps *rps)
return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
 }
 
-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+static u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
@@ -2089,10 +2079,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
rpstat)
return cagf;
 }
 
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+   i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
 
/*
@@ -2100,22 +2091,30 @@ static u32 read_cagf(struct intel_rps *rps)
 * registers will return 0 freq when GT is in RC6
 */
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
-   freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+   r = MTL_MIRROR_TARGET_WP1;
} else if (GRAPHICS_VER(i915) >= 12) {
-   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   r = GEN12_RPSTAT1;
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
} else if (GRAPHICS_VER(i915) >= 6) {
-   freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   r = GEN6_RPSTAT1;
} else {
-   freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+   r = MEMSTAT_ILK;
}
 
+   if (i915_mmio_reg_valid(r))
+   freq = take_fw ? intel_uncore_read(uncore, r) : 
intel_uncore_read_fw(uncore, r);
+
return intel_rps_get_cagf(rps, freq);
 }
 
+static u32 read_cagf(struct intel_rps *rps)
+{
+   return __read_cagf(rps, true);
+}
+
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
 {
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2128,7 +2127,12 @@ u32 intel_rps_read_actual_frequency(struct intel_rps 
*rps)
return freq;
 }
 
-u32 intel_rps_read_punit_req(struct intel_rps *rps)
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+   return intel_gpu_freq(rps, __read_cagf(rps, false));
+}
+
+static u32 intel_rps_read_punit_req(struct intel_rps *rps)
 {
struct intel_uncore *uncore = rps_to_uncore(rps);
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index c622962c6befb..a3fa987aa91f1 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -37,8 +37,8 @@ void intel_rps_mark_interactive(struct intel_rps *rps, bool 
interactive);
 
 int intel_gpu_freq(struct intel_rps *rps, int val);
 int intel_freq_opcode(struct intel_rps *rps, int val);
-u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
 u32 intel_rps_get_re

[PATCH v2] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-03-13 Thread Ashutosh Dixit
On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when allowed by HW when loading GuC firmware.

v2:
 - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
 - Add hwm_power_max_restore to error return code path

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 10 ++-
 drivers/gpu/drm/i915/i915_hwmon.c | 39 +++
 drivers/gpu/drm/i915/i915_hwmon.h |  7 +
 3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cb..15f8e94edc61 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
 #include "intel_uc.h"
 
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 
 static const struct intel_uc_ops uc_ops_off;
 static const struct intel_uc_ops uc_ops_on;
@@ -460,7 +461,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct drm_i915_private *i915 = gt->i915;
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
-   int ret, attempts;
+   int ret, attempts, pl1en;
 
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +492,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
 
+   /* Disable PL1 limit before raising freq */
+   hwm_power_max_disable(gt, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
 
while (attempts--) {
@@ -547,6 +551,8 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
 
+   hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */
+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
 
@@ -563,6 +569,8 @@ static int __uc_init_hw(struct intel_uc *uc)
/* Return GT back to RPn */
intel_rps_lower_unslice(_to_gt(uc)->rps);
 
+   hwm_power_max_restore(gt, pl1en); /* Restore PL1 limit */
+
__uc_sanitize(uc);
 
if (!ret) {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index ee63a8fd88fc..2bbca75ac477 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
 }
 
+void hwm_power_max_disable(struct intel_gt *gt, u32 *old)
+{
+   struct i915_hwmon *hwmon = gt->i915->hwmon;
+   intel_wakeref_t wakeref;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   /* Take mutex to prevent concurrent hwm_power_max_write */
+   mutex_lock(>hwmon_lock);
+
+   with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
+   r = intel_uncore_rmw(hwmon->ddat.uncore,
+hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN, 0);
+
+   *old = !!(r & PKG_PWR_LIM_1_EN);
+
+   /* hwmon_lock mutex is unlocked in hwm_power_max_restore */
+}
+
+void hwm_power_max_restore(struct intel_gt *gt, u32 old)
+{
+   struct i915_hwmon *hwmon = gt->i915->hwmon;
+   intel_wakeref_t wakeref;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
+   intel_uncore_rmw(hwmon->ddat.uncore,
+hwmon->rg.pkg_rapl_limit,
+PKG_PWR_LIM_1_EN,
+old ? PKG_PWR_LIM_1_EN : 0);
+
+   mutex_unlock(>hwmon_lock);
+}
+
 static umode_t
 hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
 {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.h 
b/drivers/gpu/drm/i915/i915_hwmon.h
index 7ca9cf2c34c9..0c2db11be2e2 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.h
+++ b/drivers/gpu/drm/i915/i915_hwmon.h
@@ -7,14 +7,21 @@
 #ifndef __I915_HWMON_H__
 #define __I915_HWMON_H__
 
+#include 
+
 struct drm_i915_private;
+struct intel_gt;
 
 #if IS_REACHABLE(CONFIG_HWMON)
 void i915_hwmon_register(struct drm_i915_private *i915);
 void i915_hwmon_unregister(struct drm_i915_private *i915);
+void hwm_power_max_disable(struct intel_gt *gt, u32 *old);
+void hwm_power_max_restore(struct intel_

[PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

2023-03-10 Thread Ashutosh Dixit
On dGfx, the PL1 power limit being enabled and set to a low value results
in a low GPU operating freq. It also negates the freq raise operation which
is done before GuC firmware load. As a result GuC firmware load can time
out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
limit was enabled and set to a low value). Therefore disable the PL1 power
limit when possible when loading GuC firmware.

Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 ++-
 drivers/gpu/drm/i915/i915_hwmon.c | 34 +--
 drivers/gpu/drm/i915/i915_hwmon.h |  7 ++
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 1b7ecd384a79..8794d54500d7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -18,6 +18,7 @@
 #include "intel_uc.h"
 
 #include "i915_drv.h"
+#include "i915_hwmon.h"
 
 static const struct intel_uc_ops uc_ops_off;
 static const struct intel_uc_ops uc_ops_on;
@@ -460,7 +461,7 @@ static int __uc_init_hw(struct intel_uc *uc)
struct drm_i915_private *i915 = gt->i915;
struct intel_guc *guc = >guc;
struct intel_huc *huc = >huc;
-   int ret, attempts;
+   int ret, attempts, pl1en;
 
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
@@ -491,6 +492,9 @@ static int __uc_init_hw(struct intel_uc *uc)
else
attempts = 1;
 
+   /* Disable PL1 limit before raising freq when possible */
+   hwm_power_max_disable(gt, );
+
intel_rps_raise_unslice(_to_gt(uc)->rps);
 
while (attempts--) {
@@ -544,6 +548,9 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_rps_lower_unslice(_to_gt(uc)->rps);
}
 
+   /* Restore PL1 limit */
+   hwm_power_max_restore(gt, pl1en);
+
guc_info(guc, "submission %s\n", 
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
guc_info(guc, "SLPC %s\n", 
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
 
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index ee63a8fd88fc..4ce3da7b7adc 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -62,20 +62,23 @@ struct i915_hwmon {
int scl_shift_time;
 };
 
-static void
+static u32
 hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
i915_reg_t reg, u32 clear, u32 set)
 {
struct i915_hwmon *hwmon = ddat->hwmon;
struct intel_uncore *uncore = ddat->uncore;
intel_wakeref_t wakeref;
+   u32 old;
 
mutex_lock(>hwmon_lock);
 
with_intel_runtime_pm(uncore->rpm, wakeref)
-   intel_uncore_rmw(uncore, reg, clear, set);
+   old = intel_uncore_rmw(uncore, reg, clear, set);
 
mutex_unlock(>hwmon_lock);
+
+   return old;
 }
 
 /*
@@ -444,6 +447,33 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int 
chan, long val)
}
 }
 
+void hwm_power_max_disable(struct intel_gt *gt, u32 *old)
+{
+   struct i915_hwmon *hwmon = gt->i915->hwmon;
+   u32 r;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   r = hwm_locked_with_pm_intel_uncore_rmw(>ddat,
+   hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN, 0);
+   *old = !!(r & PKG_PWR_LIM_1_EN);
+}
+
+void hwm_power_max_restore(struct intel_gt *gt, u32 old)
+{
+   struct i915_hwmon *hwmon = gt->i915->hwmon;
+
+   if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   return;
+
+   hwm_locked_with_pm_intel_uncore_rmw(>ddat,
+   hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN,
+   old ? PKG_PWR_LIM_1_EN : 0);
+}
+
 static umode_t
 hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
 {
diff --git a/drivers/gpu/drm/i915/i915_hwmon.h 
b/drivers/gpu/drm/i915/i915_hwmon.h
index 7ca9cf2c34c9..0c2db11be2e2 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.h
+++ b/drivers/gpu/drm/i915/i915_hwmon.h
@@ -7,14 +7,21 @@
 #ifndef __I915_HWMON_H__
 #define __I915_HWMON_H__
 
+#include 
+
 struct drm_i915_private;
+struct intel_gt;
 
 #if IS_REACHABLE(CONFIG_HWMON)
 void i915_hwmon_register(struct drm_i915_private *i915);
 void i915_hwmon_unregister(struct drm_i915_private *i915);
+void hwm_power_max_disable(struct intel_gt *gt, u32 *old);
+void hwm_power_max_restore(struct intel_gt *gt, u32 old);
 #else
 static inline void i915_hwmon_register(struct drm_i915_private *i9

[PATCH 2/2] drm/i915/pmu: Remove fallback to requested freq for SLPC

2023-03-09 Thread Ashutosh Dixit
The fallback to requested freq does not work for SLPC because SLPC does not
use 'struct intel_rps'. Also for SLPC requested freq can only be obtained
from a hw register after acquiring forcewake which we don't want to do for
PMU. Therefore remove fallback to requested freq for SLPC. The actual freq
will be 0 when gt is in RC6 which is correct. Also this is rare since PMU
freq sampling happens only when gt is unparked.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 7ece883a7d95..f697fabed64a 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -393,7 +393,14 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
 * frequency. Fortunately, the read should rarely fail!
 */
val = intel_rps_read_actual_frequency_fw(rps);
-   if (!val)
+
+   /*
+* SLPC does not use 'struct intel_rps'. Also for SLPC
+* requested freq can only be obtained after acquiring
+* forcewake and reading a hw register. For SLPC just
+* let val be 0
+*/
+   if (!val && !intel_uc_uses_guc_slpc(>uc))
val = intel_gpu_freq(rps, rps->cur_freq);
 
add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
-- 
2.38.0



[PATCH 1/2] drm/i915/pmu: Use functions common with sysfs to read actual freq

2023-03-09 Thread Ashutosh Dixit
Expose intel_rps_read_actual_frequency_fw to read the actual freq without
taking forcewake for use by PMU. The code is refactored to use a common set
of functions across sysfs and PMU. Using common functions with sysfs in PMU
solves the issues of missing support for MTL and missing support for older
generations (prior to Gen6). It also future proofs the PMU where sometimes
code has been updated for sysfs and PMU has been missed.

v2: Remove runtime_pm_if_in_use from read_actual_frequency_fw (Tvrtko)

Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8280
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 34 -
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 +-
 drivers/gpu/drm/i915/i915_pmu.c | 10 -
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4d0dc9de23f9..9d9ac35691fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
-{
-   struct drm_i915_private *i915 = rps_to_i915(rps);
-   i915_reg_t rpstat;
-
-   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
-   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
 u32 intel_rps_read_rpstat(struct intel_rps *rps)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2089,10 +2079,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
rpstat)
return cagf;
 }
 
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+   i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
 
/*
@@ -2100,22 +2091,30 @@ static u32 read_cagf(struct intel_rps *rps)
 * registers will return 0 freq when GT is in RC6
 */
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
-   freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+   r = MTL_MIRROR_TARGET_WP1;
} else if (GRAPHICS_VER(i915) >= 12) {
-   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   r = GEN12_RPSTAT1;
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
+   goto exit;
} else if (GRAPHICS_VER(i915) >= 6) {
-   freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   r = GEN6_RPSTAT1;
} else {
-   freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+   r = MEMSTAT_ILK;
}
 
+   freq = take_fw ? intel_uncore_read(uncore, r) : 
intel_uncore_read_fw(uncore, r);
+exit:
return intel_rps_get_cagf(rps, freq);
 }
 
+static u32 read_cagf(struct intel_rps *rps)
+{
+   return __read_cagf(rps, true);
+}
+
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
 {
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2128,6 +2127,11 @@ u32 intel_rps_read_actual_frequency(struct intel_rps 
*rps)
return freq;
 }
 
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+   return intel_gpu_freq(rps, __read_cagf(rps, false));
+}
+
 u32 intel_rps_read_punit_req(struct intel_rps *rps)
 {
struct intel_uncore *uncore = rps_to_uncore(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index c622962c6bef..2d5b3ef58606 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -39,6 +39,7 @@ int intel_gpu_freq(struct intel_rps *rps, int val);
 int intel_freq_opcode(struct intel_rps *rps, int val);
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
 u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
 u32 intel_rps_get_min_frequency(struct intel_rps *rps);
 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
@@ -52,7 +53,6 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
 u32 intel_rps_read_punit_req(struct intel_rps *rps);
 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
 u32 intel_rps_read_rpstat(struct intel_rps *rps);
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
 void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps 
*caps);
 void intel_rps_raise_unslice(struct intel_rps *rps);
 void intel_rps_lower_unslice(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/dri

[PATCH 0/2] drm/i915/pmu: Use common freq functions with sysfs

2023-03-09 Thread Ashutosh Dixit
Expose intel_rps_read_actual_frequency_fw to read the actual freq without
taking forcewake for use by PMU. The code is refactored to use a common set
of functions across sysfs and PMU. Using common functions with sysfs in PMU
solves the issues of missing support for MTL and missing support for older
generations (prior to Gen6). It also future proofs the PMU where sometimes
code has been updated for sysfs and PMU has been missed.

Ashutosh Dixit (2):
  drm/i915/pmu: Use functions common with sysfs to read actual freq
  drm/i915/pmu: Remove fallback to requested freq for SLPC

 drivers/gpu/drm/i915/gt/intel_rps.c | 34 -
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 +-
 drivers/gpu/drm/i915/i915_pmu.c | 17 ++-
 3 files changed, 31 insertions(+), 22 deletions(-)

-- 
2.38.0



[PATCH 1/2] drm/i915/pmu: Use functions common with sysfs to read actual freq

2023-03-08 Thread Ashutosh Dixit
Expose intel_rps_read_actual_frequency_fw to read the actual freq without
taking forcewake for use by PMU. The code is refactored to use a common set
of functions across sysfs and PMU. Using common functions with sysfs in PMU
solves the issues of missing support for MTL and missing support for older
generations (prior to Gen6). It also future proofs the PMU where sometimes
code has been updated for sysfs and PMU has been missed.

Fixes: 22009b6dad66 ("drm/i915/mtl: Modify CAGF functions for MTL")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8280
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 46 +++--
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 +-
 drivers/gpu/drm/i915/i915_pmu.c | 10 +++
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4d0dc9de23f9..3957c5ee5cba 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
-{
-   struct drm_i915_private *i915 = rps_to_i915(rps);
-   i915_reg_t rpstat;
-
-   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
-   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
 u32 intel_rps_read_rpstat(struct intel_rps *rps)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2089,10 +2079,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
rpstat)
return cagf;
 }
 
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+   i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
 
/*
@@ -2100,22 +2091,30 @@ static u32 read_cagf(struct intel_rps *rps)
 * registers will return 0 freq when GT is in RC6
 */
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
-   freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+   r = MTL_MIRROR_TARGET_WP1;
} else if (GRAPHICS_VER(i915) >= 12) {
-   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   r = GEN12_RPSTAT1;
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
+   goto exit;
} else if (GRAPHICS_VER(i915) >= 6) {
-   freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   r = GEN6_RPSTAT1;
} else {
-   freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+   r = MEMSTAT_ILK;
}
 
+   freq = take_fw ? intel_uncore_read(uncore, r) : 
intel_uncore_read_fw(uncore, r);
+exit:
return intel_rps_get_cagf(rps, freq);
 }
 
+static u32 read_cagf(struct intel_rps *rps)
+{
+   return __read_cagf(rps, true);
+}
+
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
 {
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2128,6 +2127,23 @@ u32 intel_rps_read_actual_frequency(struct intel_rps 
*rps)
return freq;
 }
 
+static u32 read_cagf_fw(struct intel_rps *rps)
+{
+   return __read_cagf(rps, false);
+}
+
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+   struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
+   intel_wakeref_t wakeref;
+   u32 freq = 0;
+
+   with_intel_runtime_pm_if_in_use(rpm, wakeref)
+   freq = intel_gpu_freq(rps, read_cagf_fw(rps));
+
+   return freq;
+}
+
 u32 intel_rps_read_punit_req(struct intel_rps *rps)
 {
struct intel_uncore *uncore = rps_to_uncore(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index c622962c6bef..2d5b3ef58606 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -39,6 +39,7 @@ int intel_gpu_freq(struct intel_rps *rps, int val);
 int intel_freq_opcode(struct intel_rps *rps, int val);
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
 u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
 u32 intel_rps_get_min_frequency(struct intel_rps *rps);
 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
@@ -52,7 +53,6 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
 u32 intel_rps_read_punit_req(struct intel_rps *rps);
 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
 u32 intel_rps_read_rpstat(struct intel_rps *rps);
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
 void gen6_rps_get_

[PATCH 0/2] drm/i915/pmu: Use common freq functions with sysfs

2023-03-08 Thread Ashutosh Dixit
Expose intel_rps_read_actual_frequency_fw to read the actual freq without
taking forcewake for use by PMU. The code is refactored to use a common set
of functions across sysfs and PMU. Using common functions with sysfs in PMU
solves the issues of missing support for MTL and missing support for older
generations (prior to Gen6). It also future proofs the PMU where sometimes
code has been updated for sysfs and PMU has been missed.

Ashutosh Dixit (2):
  drm/i915/pmu: Use functions common with sysfs to read actual freq
  drm/i915/pmu: Remove fallback to requested freq for SLPC

 drivers/gpu/drm/i915/gt/intel_rps.c | 46 +++--
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 +-
 drivers/gpu/drm/i915/i915_pmu.c | 17 +++
 3 files changed, 43 insertions(+), 22 deletions(-)

-- 
2.38.0



[PATCH 2/2] drm/i915/pmu: Remove fallback to requested freq for SLPC

2023-03-08 Thread Ashutosh Dixit
The fallback to requested freq does not work for SLPC because SLPC does not
use 'struct intel_rps'. Also for SLPC requested freq can only be obtained
from a hw register after acquiring forcewake which we don't want to do for
PMU. Therefore remove fallback to requested freq for SLPC. The actual freq
will be 0 when gt is in RC6 which is correct. Also this is rare since PMU
freq sampling happens only when gt is unparked.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 7ece883a7d95..f697fabed64a 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -393,7 +393,14 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
 * frequency. Fortunately, the read should rarely fail!
 */
val = intel_rps_read_actual_frequency_fw(rps);
-   if (!val)
+
+   /*
+* SLPC does not use 'struct intel_rps'. Also for SLPC
+* requested freq can only be obtained after acquiring
+* forcewake and reading a hw register. For SLPC just
+* let val be 0
+*/
+   if (!val && !intel_uc_uses_guc_slpc(>uc))
val = intel_gpu_freq(rps, rps->cur_freq);
 
add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
-- 
2.38.0



[PATCH 0/3] drm/i915/pmu: Use common freq functions with sysfs

2023-03-07 Thread Ashutosh Dixit
Using common freq functions with sysfs in PMU (but without taking
forcewake) solves the following issues (a) missing support for MTL (b)
missing support for older generations (prior to Gen6) (c) missing support
for slpc when freq sampling has to fall back to requested freq. It also
makes the PMU code future proof where sometimes code has been updated for
sysfs and PMU has been missed.

Ashutosh Dixit (3):
  drm/i915/rps: Expose read_actual_frequency_fw for PMU
  drm/i915/rps: Expose get_requested_frequency_fw for PMU
  drm/i915/pmu: Use common freq functions with sysfs

 drivers/gpu/drm/i915/gt/intel_rps.c | 68 +
 drivers/gpu/drm/i915/gt/intel_rps.h |  4 +-
 drivers/gpu/drm/i915/i915_pmu.c | 10 ++---
 3 files changed, 56 insertions(+), 26 deletions(-)

-- 
2.38.0



[PATCH 2/3] drm/i915/rps: Expose get_requested_frequency_fw for PMU

2023-03-07 Thread Ashutosh Dixit
Expose intel_rps_get_requested_frequency_fw to read the requested freq
without taking forcewake. This is done for use by PMU which does not take
forcewake when reading freq. The code is refactored to use a common set of
functions across sysfs and PMU. It also allows PMU to support both host
turbo (rps) and slpc which was previously missed due to the non-use of
common functions across sysfs and PMU.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 22 +++---
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 +-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 0a8e24bcb874..49df31927c0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2154,7 +2154,7 @@ u32 intel_rps_read_actual_frequency_fw(struct intel_rps 
*rps)
return freq;
 }
 
-u32 intel_rps_read_punit_req(struct intel_rps *rps)
+static u32 intel_rps_read_punit_req(struct intel_rps *rps, bool take_fw)
 {
struct intel_uncore *uncore = rps_to_uncore(rps);
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2162,7 +2162,8 @@ u32 intel_rps_read_punit_req(struct intel_rps *rps)
u32 freq = 0;
 
with_intel_runtime_pm_if_in_use(rpm, wakeref)
-   freq = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+   freq = take_fw ? intel_uncore_read(uncore, GEN6_RPNSWREQ) :
+   intel_uncore_read_fw(uncore, GEN6_RPNSWREQ);
 
return freq;
 }
@@ -2176,7 +2177,7 @@ static u32 intel_rps_get_req(u32 pureq)
 
 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
 {
-   u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps));
+   u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps, true));
 
return intel_gpu_freq(rps, freq);
 }
@@ -2189,6 +2190,21 @@ u32 intel_rps_get_requested_frequency(struct intel_rps 
*rps)
return intel_gpu_freq(rps, rps->cur_freq);
 }
 
+static u32 intel_rps_read_punit_req_frequency_fw(struct intel_rps *rps)
+{
+   u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps, false));
+
+   return intel_gpu_freq(rps, freq);
+}
+
+u32 intel_rps_get_requested_frequency_fw(struct intel_rps *rps)
+{
+   if (rps_uses_slpc(rps))
+   return intel_rps_read_punit_req_frequency_fw(rps);
+   else
+   return intel_gpu_freq(rps, rps->cur_freq);
+}
+
 u32 intel_rps_get_max_frequency(struct intel_rps *rps)
 {
struct intel_guc_slpc *slpc = rps_to_slpc(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index 63511b826a97..a990f985ab23 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -41,6 +41,7 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
 u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
 u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
+u32 intel_rps_get_requested_frequency_fw(struct intel_rps *rps);
 u32 intel_rps_get_min_frequency(struct intel_rps *rps);
 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
 int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val);
@@ -50,7 +51,6 @@ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 
val);
 u32 intel_rps_get_rp0_frequency(struct intel_rps *rps);
 u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
-u32 intel_rps_read_punit_req(struct intel_rps *rps);
 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
 u32 intel_rps_read_rpstat(struct intel_rps *rps);
 u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
-- 
2.38.0



[PATCH 3/3] drm/i915/pmu: Use common freq functions with sysfs

2023-03-07 Thread Ashutosh Dixit
Using common freq functions with sysfs in PMU (but without taking
forcewake) solves the following issues (a) missing support for MTL (b)
missing support for older generation (prior to Gen6) (c) missing support
for slpc when freq sampling has to fall back to requested freq. It also
makes the PMU code future proof where sometimes code has been updated for
sysfs and PMU has been missed.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 10 --
 drivers/gpu/drm/i915/gt/intel_rps.h |  1 -
 drivers/gpu/drm/i915/i915_pmu.c | 10 --
 3 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 49df31927c0e..b03bfbe7ee23 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2046,16 +2046,6 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
-{
-   struct drm_i915_private *i915 = rps_to_i915(rps);
-   i915_reg_t rpstat;
-
-   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
-
-   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
-}
-
 u32 intel_rps_read_rpstat(struct intel_rps *rps)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index a990f985ab23..60ae27679011 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -53,7 +53,6 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps);
 u32 intel_rps_read_rpstat(struct intel_rps *rps);
-u32 intel_rps_read_rpstat_fw(struct intel_rps *rps);
 void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps 
*caps);
 void intel_rps_raise_unslice(struct intel_rps *rps);
 void intel_rps_lower_unslice(struct intel_rps *rps);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a76c5ce9513d..1a4c9fed257c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -392,14 +392,12 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
 * case we assume the system is running at the intended
 * frequency. Fortunately, the read should rarely fail!
 */
-   val = intel_rps_read_rpstat_fw(rps);
-   if (val)
-   val = intel_rps_get_cagf(rps, val);
-   else
-   val = rps->cur_freq;
+   val = intel_rps_read_actual_frequency_fw(rps);
+   if (!val)
+   val = intel_rps_get_requested_frequency_fw(rps),
 
add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
-   intel_gpu_freq(rps, val), period_ns / 1000);
+   val, period_ns / 1000);
}
 
if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) {
-- 
2.38.0



[PATCH 1/3] drm/i915/rps: Expose read_actual_frequency_fw for PMU

2023-03-07 Thread Ashutosh Dixit
Expose intel_rps_read_actual_frequency_fw to read the actual/granted freq
without taking forcewake. This is done for use by PMU which does not take
forcewake when reading freq. The code is refactored to use a common set of
functions across sysfs and PMU. It also allows PMU to support MTL as well
as older generations (before Gen6) which were previously missed due to the
non-use of common functions across sysfs and PMU.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 36 +
 drivers/gpu/drm/i915/gt/intel_rps.h |  1 +
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 4d0dc9de23f9..0a8e24bcb874 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2089,10 +2089,11 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
rpstat)
return cagf;
 }
 
-static u32 read_cagf(struct intel_rps *rps)
+static u32 __read_cagf(struct intel_rps *rps, bool take_fw)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
struct intel_uncore *uncore = rps_to_uncore(rps);
+   i915_reg_t r = INVALID_MMIO_REG;
u32 freq;
 
/*
@@ -2100,22 +2101,30 @@ static u32 read_cagf(struct intel_rps *rps)
 * registers will return 0 freq when GT is in RC6
 */
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
-   freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+   r = MTL_MIRROR_TARGET_WP1;
} else if (GRAPHICS_VER(i915) >= 12) {
-   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   r = GEN12_RPSTAT1;
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
+   goto exit;
} else if (GRAPHICS_VER(i915) >= 6) {
-   freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   r = GEN6_RPSTAT1;
} else {
-   freq = intel_uncore_read(uncore, MEMSTAT_ILK);
+   r = MEMSTAT_ILK;
}
 
+   freq = take_fw ? intel_uncore_read(uncore, r) : 
intel_uncore_read_fw(uncore, r);
+exit:
return intel_rps_get_cagf(rps, freq);
 }
 
+static u32 read_cagf(struct intel_rps *rps)
+{
+   return __read_cagf(rps, true);
+}
+
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
 {
struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
@@ -2128,6 +2137,23 @@ u32 intel_rps_read_actual_frequency(struct intel_rps 
*rps)
return freq;
 }
 
+static u32 read_cagf_fw(struct intel_rps *rps)
+{
+   return __read_cagf(rps, false);
+}
+
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps)
+{
+   struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
+   intel_wakeref_t wakeref;
+   u32 freq = 0;
+
+   with_intel_runtime_pm_if_in_use(rpm, wakeref)
+   freq = intel_gpu_freq(rps, read_cagf_fw(rps));
+
+   return freq;
+}
+
 u32 intel_rps_read_punit_req(struct intel_rps *rps)
 {
struct intel_uncore *uncore = rps_to_uncore(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index c622962c6bef..63511b826a97 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -39,6 +39,7 @@ int intel_gpu_freq(struct intel_rps *rps, int val);
 int intel_freq_opcode(struct intel_rps *rps, int val);
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
 u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+u32 intel_rps_read_actual_frequency_fw(struct intel_rps *rps);
 u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
 u32 intel_rps_get_min_frequency(struct intel_rps *rps);
 u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
-- 
2.38.0



[PATCH 2/2] drm/i915/pmu: Use correct requested freq for SLPC

2023-03-03 Thread Ashutosh Dixit
SLPC does not use 'struct intel_rps'. Use UNSLICE_RATIO bits from
GEN6_RPNSWREQ for SLPC. See intel_rps_get_requested_frequency.

Bspec: 52745

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f0a1e36915b8..5ee836610801 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -394,8 +394,13 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
 * frequency. Fortunately, the read should rarely fail!
 */
val = intel_rps_get_cagf(rps, intel_rps_read_rpstat_fw(rps));
-   if (!val)
-   val = rps->cur_freq;
+   if (!val) {
+   if (intel_uc_uses_guc_slpc(>uc))
+   val = intel_rps_read_punit_req(rps) >>
+   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
+   else
+   val = rps->cur_freq;
+   }
 
add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
intel_gpu_freq(rps, val), period_ns / 1000);
-- 
2.38.0



[PATCH 0/2] drm/i915/pmu: Freq sampling: Fix requested freq fallback

2023-03-03 Thread Ashutosh Dixit
A couple of minor fixes to the PMU requested freq fallback for PMU freq
sampling.

Ashutosh Dixit (2):
  drm/i915/pmu: Use only freq bits for falling back to requested freq
  drm/i915/pmu: Use correct requested freq for SLPC

 drivers/gpu/drm/i915/i915_pmu.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

-- 
2.38.0



[PATCH 1/2] drm/i915/pmu: Use only freq bits for falling back to requested freq

2023-03-03 Thread Ashutosh Dixit
On newer generations, the GEN12_RPSTAT1 register contains more than freq
information, e.g. see GEN12_VOLTAGE_MASK. Therefore use only the freq bits
to decide whether to fall back to requested freq.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_pmu.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 52531ab28c5f..f0a1e36915b8 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -393,10 +393,8 @@ frequency_sample(struct intel_gt *gt, unsigned int 
period_ns)
 * case we assume the system is running at the intended
 * frequency. Fortunately, the read should rarely fail!
 */
-   val = intel_rps_read_rpstat_fw(rps);
-   if (val)
-   val = intel_rps_get_cagf(rps, val);
-   else
+   val = intel_rps_get_cagf(rps, intel_rps_read_rpstat_fw(rps));
+   if (!val)
val = rps->cur_freq;
 
add_sample_mult(>sample[__I915_SAMPLE_FREQ_ACT],
-- 
2.38.0



[PATCH] drm/i915/hwmon: Accept writes of value 0 to power1_max_interval

2023-02-27 Thread Ashutosh Dixit
The value shown by power1_max_interval in millisec is essentially:
((1.x * power(2,y)) * 1000) >> 10
Where x and y are read from a HW register. On ATSM, x and y are 0 on
power-up so the value shown is 0.

Writes of 0 to power1_max_interval had previously been disallowed to avoid
computing ilog2(0) but this resulted in the corner-case bug
below. Therefore allow writes of 0 now but special case that write to
x = y = 0.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7754
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 7c20a6f47b92e..596dd2c070106 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -218,11 +218,15 @@ hwm_power1_max_interval_store(struct device *dev,
/* val in hw units */
val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
/* Convert to 1.x * power(2,y) */
-   if (!val)
-   return -EINVAL;
-   y = ilog2(val);
-   /* x = (val - (1 << y)) >> (y - 2); */
-   x = (val - (1ul << y)) << x_w >> y;
+   if (!val) {
+   /* Avoid ilog2(0) */
+   y = 0;
+   x = 0;
+   } else {
+   y = ilog2(val);
+   /* x = (val - (1 << y)) >> (y - 2); */
+   x = (val - (1ul << y)) << x_w >> y;
+   }
 
rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | 
REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
 
-- 
2.38.0



[PATCH 2/2] drm/i915/hwmon: Enable PL1 limit when writing limit value to HW

2023-02-16 Thread Ashutosh Dixit
Previous documentation suggested that the PL1 power limit is always enabled
in HW. However we now find this not to be the case on some platforms (such
as ATSM). Therefore enable the PL1 power limit (by setting the enable bit)
when writing the PL1 limit value to HW.

Bspec: 51864

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 85195d61f89c7..7c20a6f47b92e 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -385,10 +385,11 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
+   nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
 
hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1,
-   REG_FIELD_PREP(PKG_PWR_LIM_1, 
nval));
+   PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1,
+   nval);
return 0;
 }
 
-- 
2.38.0



[PATCH 1/2] drm/i915/hwmon: Replace hwm_field_scale_and_write with hwm_power_max_write

2023-02-16 Thread Ashutosh Dixit
hwm_field_scale_and_write has a single caller hwm_power_write and is
specific to hwm_power_write but makes it appear that it is a general
function which can have multiple callers. Replace the function with
hwm_power_max_write which is specific to hwm_power_write and use that in
future patches where the function needs to be extended.

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 36 ++-
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 1225bc432f0d5..85195d61f89c7 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -99,20 +99,6 @@ hwm_field_read_and_scale(struct hwm_drvdata *ddat, 
i915_reg_t rgadr,
return mul_u64_u32_shr(reg_value, scale_factor, nshift);
 }
 
-static void
-hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr,
- int nshift, unsigned int scale_factor, long lval)
-{
-   u32 nval;
-
-   /* Computation in 64-bits to avoid overflow. Round to nearest. */
-   nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
-
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
-   PKG_PWR_LIM_1,
-   REG_FIELD_PREP(PKG_PWR_LIM_1, 
nval));
-}
-
 /*
  * hwm_energy - Obtain energy value
  *
@@ -391,6 +377,21 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
return 0;
 }
 
+static int
+hwm_power_max_write(struct hwm_drvdata *ddat, long val)
+{
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   u32 nval;
+
+   /* Computation in 64-bits to avoid overflow. Round to nearest. */
+   nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
+
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1,
+   REG_FIELD_PREP(PKG_PWR_LIM_1, 
nval));
+   return 0;
+}
+
 static int
 hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
 {
@@ -425,16 +426,11 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int 
chan, long *val)
 static int
 hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
 {
-   struct i915_hwmon *hwmon = ddat->hwmon;
u32 uval;
 
switch (attr) {
case hwmon_power_max:
-   hwm_field_scale_and_write(ddat,
- hwmon->rg.pkg_rapl_limit,
- hwmon->scl_shift_power,
- SF_POWER, val);
-   return 0;
+   return hwm_power_max_write(ddat, val);
case hwmon_power_crit:
uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, 
SF_POWER);
return hwm_pcode_write_i1(ddat->uncore->i915, uval);
-- 
2.38.0



[PATCH 0/2] PL1 power limit fixes for ATSM

2023-02-16 Thread Ashutosh Dixit
Previous PL1 power limit implementation assumed that the PL1 limit is
always enabled in HW. However we now find this not to be the case on ATSM
where the PL1 limit is disabled at power up. This requires changes in the
previous PL1 limit implementation.

v2: Dropping Patch 3 (since it is NAK'd by hwmon) so that the first two
patches can get merged. The first two patches are sufficient to fix the
main ATSM issue.

Ashutosh Dixit (2):
  drm/i915/hwmon: Replace hwm_field_scale_and_write with
hwm_power_max_write
  drm/i915/hwmon: Enable PL1 limit when writing limit value to HW

 drivers/gpu/drm/i915/i915_hwmon.c | 37 ++-
 1 file changed, 17 insertions(+), 20 deletions(-)

-- 
2.38.0



[PATCH 1/3] drm/i915/hwmon: Replace hwm_field_scale_and_write with hwm_power_max_write

2023-02-13 Thread Ashutosh Dixit
hwm_field_scale_and_write has a single caller hwm_power_write and is
specific to hwm_power_write but makes it appear that it is a general
function which can have multiple callers. Replace the function with
hwm_power_max_write which is specific to hwm_power_write and use that in
future patches where the function needs to be extended.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 36 ++-
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 1225bc432f0d5..85195d61f89c7 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -99,20 +99,6 @@ hwm_field_read_and_scale(struct hwm_drvdata *ddat, 
i915_reg_t rgadr,
return mul_u64_u32_shr(reg_value, scale_factor, nshift);
 }
 
-static void
-hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr,
- int nshift, unsigned int scale_factor, long lval)
-{
-   u32 nval;
-
-   /* Computation in 64-bits to avoid overflow. Round to nearest. */
-   nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
-
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
-   PKG_PWR_LIM_1,
-   REG_FIELD_PREP(PKG_PWR_LIM_1, 
nval));
-}
-
 /*
  * hwm_energy - Obtain energy value
  *
@@ -391,6 +377,21 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
return 0;
 }
 
+static int
+hwm_power_max_write(struct hwm_drvdata *ddat, long val)
+{
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   u32 nval;
+
+   /* Computation in 64-bits to avoid overflow. Round to nearest. */
+   nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
+
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1,
+   REG_FIELD_PREP(PKG_PWR_LIM_1, 
nval));
+   return 0;
+}
+
 static int
 hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
 {
@@ -425,16 +426,11 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int 
chan, long *val)
 static int
 hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
 {
-   struct i915_hwmon *hwmon = ddat->hwmon;
u32 uval;
 
switch (attr) {
case hwmon_power_max:
-   hwm_field_scale_and_write(ddat,
- hwmon->rg.pkg_rapl_limit,
- hwmon->scl_shift_power,
- SF_POWER, val);
-   return 0;
+   return hwm_power_max_write(ddat, val);
case hwmon_power_crit:
uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, 
SF_POWER);
return hwm_pcode_write_i1(ddat->uncore->i915, uval);
-- 
2.38.0



[PATCH 2/3] drm/i915/hwmon: Enable PL1 limit when writing limit value to HW

2023-02-13 Thread Ashutosh Dixit
Previous documentation suggested that the PL1 power limit is always enabled
in HW. However we now find this not to be the case on some platforms (such
as ATSM). Therefore enable the PL1 power limit (by setting the enable bit)
when writing the PL1 limit value to HW.

Bspec: 51864

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 85195d61f89c7..7c20a6f47b92e 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -385,10 +385,11 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
 
/* Computation in 64-bits to avoid overflow. Round to nearest. */
nval = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_power, 
SF_POWER);
+   nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
 
hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1,
-   REG_FIELD_PREP(PKG_PWR_LIM_1, 
nval));
+   PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1,
+   nval);
return 0;
 }
 
-- 
2.38.0



[PATCH 3/3] drm/i915/hwmon: Expose power1_max_enable

2023-02-13 Thread Ashutosh Dixit
On ATSM the PL1 power limit is disabled at power up. The previous uapi
assumed that the PL1 limit is always enabled and therefore did not have a
notion of a disabled PL1 limit. This results in erroneous PL1 limit values
when PL1 limit is disabled. For example at power up, the disabled ATSM PL1
limit is shown as 0 which means a low PL1 limit whereas the limit being
disabled actually implies a high effective PL1 limit value.

To get round this problem, expose power1_max_enable as a custom hwmon
attribute. power1_max_enable can be used in conjunction with power1_max to
interpret power1_max (PL1 limit) values correctly. It can also be used to
enable/disable the PL1 power limit.

Signed-off-by: Ashutosh Dixit 
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
 drivers/gpu/drm/i915/i915_hwmon.c | 48 +--
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index 2d6a472eef885..edd94a44b4570 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -18,6 +18,13 @@ Description: RW. Card reactive sustained  (PL1/Tau) power 
limit in microwatts.
 
Only supported for particular Intel i915 graphics platforms.
 
+What:  /sys/devices/.../hwmon/hwmon/power1_max_enable
+Date:  May 2023
+KernelVersion: 6.3
+Contact:   intel-...@lists.freedesktop.org
+Description:   RW. Enable/disable the PL1 power limit (power1_max).
+
+   Only supported for particular Intel i915 graphics platforms.
 What:  /sys/devices/.../hwmon/hwmon/power1_rated_max
 Date:  February 2023
 KernelVersion: 6.2
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 7c20a6f47b92e..5665869d8602b 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -230,13 +230,52 @@ hwm_power1_max_interval_store(struct device *dev,
PKG_PWR_LIM_1_TIME, rxy);
return count;
 }
+static SENSOR_DEVICE_ATTR_RW(power1_max_interval, hwm_power1_max_interval, 0);
 
-static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
- hwm_power1_max_interval_show,
- hwm_power1_max_interval_store, 0);
+static ssize_t
+hwm_power1_max_enable_show(struct device *dev, struct device_attribute *attr, 
char *buf)
+{
+   struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+   intel_wakeref_t wakeref;
+   u32 r;
+
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read(ddat->uncore, 
ddat->hwmon->rg.pkg_rapl_limit);
+
+   return sysfs_emit(buf, "%u\n", !!(r & PKG_PWR_LIM_1_EN));
+}
+
+static ssize_t
+hwm_power1_max_enable_store(struct device *dev, struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+   intel_wakeref_t wakeref;
+   u32 en, r;
+   bool _en;
+   int ret;
+
+   ret = kstrtobool(buf, &_en);
+   if (ret)
+   return ret;
+
+   en = REG_FIELD_PREP(PKG_PWR_LIM_1_EN, _en);
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
ddat->hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN, en);
+
+   /* Verify, because PL1 limit cannot be disabled on all platforms */
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read(ddat->uncore, 
ddat->hwmon->rg.pkg_rapl_limit);
+   if ((r & PKG_PWR_LIM_1_EN) != en)
+   return -EPERM;
+
+   return count;
+}
+static SENSOR_DEVICE_ATTR_RW(power1_max_enable, hwm_power1_max_enable, 0);
 
 static struct attribute *hwm_attributes[] = {
_dev_attr_power1_max_interval.dev_attr.attr,
+   _dev_attr_power1_max_enable.dev_attr.attr,
NULL
 };
 
@@ -247,7 +286,8 @@ static umode_t hwm_attributes_visible(struct kobject *kobj,
struct hwm_drvdata *ddat = dev_get_drvdata(dev);
struct i915_hwmon *hwmon = ddat->hwmon;
 
-   if (attr == _dev_attr_power1_max_interval.dev_attr.attr)
+   if (attr == _dev_attr_power1_max_interval.dev_attr.attr ||
+   attr == _dev_attr_power1_max_enable.dev_attr.attr)
return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? 
attr->mode : 0;
 
return 0;
-- 
2.38.0



[PATCH 0/3] PL1 power limit fixes for ATSM

2023-02-13 Thread Ashutosh Dixit
Previous PL1 power limit implementation assumed that the PL1 limit is
always enabled in HW. However we now find this not to be the case on ATSM
where the PL1 limit is disabled at power up. This requires changes in the
previous PL1 limit implementation.

Submitting 3 patches for easier review but patches can be squashed if
needed.

Ashutosh Dixit (3):
  drm/i915/hwmon: Replace hwm_field_scale_and_write with
hwm_power_max_write
  drm/i915/hwmon: Enable PL1 limit when writing limit value to HW
  drm/i915/hwmon: Expose power1_max_enable

 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 ++
 drivers/gpu/drm/i915/i915_hwmon.c | 85 +--
 2 files changed, 68 insertions(+), 24 deletions(-)

-- 
2.38.0



[PATCH] Revert "drm/i915/hwmon: Enable PL1 power limit"

2023-02-08 Thread Ashutosh Dixit
This reverts commit 0349c41b05968befaffa5fbb7e73d0ee6004f610.

0349c41b0596 ("drm/i915/hwmon: Enable PL1 power limit") is incorrect and
caused a major regression on ATSM. The change enabled the PL1 power limit
but FW sets the default value of the PL1 limit to 0 which implies HW now
works at minimum power and therefore the lowest effective frequency. This
means all workloads now run slower resulting in even GuC FW load operations
timing out, rendering ATSM unusable.

A different solution to the original issue of the PL1 limit being disabled
on ATSM is needed but till that is developed, revert 0349c41b0596.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 4683a5b96eff1..1225bc432f0d5 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -687,11 +687,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915)
for_each_gt(gt, i915, i)
hwm_energy(>ddat_gt[i], );
}
-
-   /* Enable PL1 power limit */
-   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
-   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1_EN, 
PKG_PWR_LIM_1_EN);
 }
 
 void i915_hwmon_register(struct drm_i915_private *i915)
-- 
2.38.0



[PATCH] drm/i915/hwmon: Enable PL1 power limit

2023-02-03 Thread Ashutosh Dixit
Previous documentation suggested that PL1 power limit is always
enabled. However we now find this not to be the case on some
platforms (such as ATSM). Therefore enable PL1 power limit during hwmon
initialization.

Bspec: 51864

v2: Add Bspec reference (Gwan-gyeong)
v3: Add Fixes tag

Fixes: 99f55efb79114 ("drm/i915/hwmon: Power PL1 limit and TDP setting")
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Gwan-gyeong Mun 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 1225bc432f0d5..4683a5b96eff1 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915)
for_each_gt(gt, i915, i)
hwm_energy(>ddat_gt[i], );
}
+
+   /* Enable PL1 power limit */
+   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN, 
PKG_PWR_LIM_1_EN);
 }
 
 void i915_hwmon_register(struct drm_i915_private *i915)
-- 
2.38.0



[PATCH] drm/i915/hwmon: Enable PL1 power limit

2023-02-02 Thread Ashutosh Dixit
Previous documentation suggested that PL1 power limit is always
enabled. However we now find this not to be the case on some
platforms (such as ATSM). Therefore enable PL1 power limit during hwmon
initialization.

Bspec: 51864

v2: Add Bspec reference (Gwan-gyeong)

Signed-off-by: Ashutosh Dixit 
Reviewed-by: Gwan-gyeong Mun 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 1225bc432f0d5..4683a5b96eff1 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915)
for_each_gt(gt, i915, i)
hwm_energy(>ddat_gt[i], );
}
+
+   /* Enable PL1 power limit */
+   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN, 
PKG_PWR_LIM_1_EN);
 }
 
 void i915_hwmon_register(struct drm_i915_private *i915)
-- 
2.38.0



[PATCH] drm/i915/hwmon: Enable PL1 power limit

2023-02-01 Thread Ashutosh Dixit
Previous documentation suggested that PL1 power limit is always
enabled. However we now find this not to be the case on some
platforms (such as ATSM). Therefore enable PL1 power limit during hwmon
initialization.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 1225bc432f0d5..4683a5b96eff1 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915)
for_each_gt(gt, i915, i)
hwm_energy(>ddat_gt[i], );
}
+
+   /* Enable PL1 power limit */
+   if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+   hwm_locked_with_pm_intel_uncore_rmw(ddat, 
hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1_EN, 
PKG_PWR_LIM_1_EN);
 }
 
 void i915_hwmon_register(struct drm_i915_private *i915)
-- 
2.38.0



[PATCH] drm/i915/hwmon: Display clamped PL1 limit

2022-12-15 Thread Ashutosh Dixit
HW allows arbitrary PL1 limits to be set but silently clamps these values
to "typical but not guaranteed" min/max values in pkg_power_sku
register. Follow the same pattern for sysfs, allow arbitrary PL1 limits to
be set but display clamped values when read, so that users see PL1 limits
HW is likely using. Otherwise users think HW is using arbitrarily high/low
PL1 limits they might have set. The previous write/read I1 power1_crit
limit also follows the same clamping pattern.

v2: Explain "why" in commit message and include bug link (Jani Nikula)

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7704
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c| 39 
 drivers/gpu/drm/i915/intel_mchbar_regs.h |  2 ++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index cca7a4350ec8f..1225bc432f0d5 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -359,6 +359,38 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 
attr, int chan)
}
 }
 
+/*
+ * HW allows arbitrary PL1 limits to be set but silently clamps these values to
+ * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the
+ * same pattern for sysfs, allow arbitrary PL1 limits to be set but display
+ * clamped values when read. Write/read I1 also follows the same pattern.
+ */
+static int
+hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
+{
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
+   u64 r, min, max;
+
+   *val = hwm_field_read_and_scale(ddat,
+   hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1,
+   hwmon->scl_shift_power,
+   SF_POWER);
+
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read64(ddat->uncore, hwmon->rg.pkg_power_sku);
+   min = REG_FIELD_GET(PKG_MIN_PWR, r);
+   min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+   max = REG_FIELD_GET(PKG_MAX_PWR, r);
+   max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+
+   if (min && max)
+   *val = clamp_t(u64, *val, min, max);
+
+   return 0;
+}
+
 static int
 hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
 {
@@ -368,12 +400,7 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int 
chan, long *val)
 
switch (attr) {
case hwmon_power_max:
-   *val = hwm_field_read_and_scale(ddat,
-   hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1,
-   hwmon->scl_shift_power,
-   SF_POWER);
-   return 0;
+   return hwm_power_max_read(ddat, val);
case hwmon_power_rated_max:
*val = hwm_field_read_and_scale(ddat,
hwmon->rg.pkg_power_sku,
diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h 
b/drivers/gpu/drm/i915/intel_mchbar_regs.h
index f93e9af43ac35..73900c098d591 100644
--- a/drivers/gpu/drm/i915/intel_mchbar_regs.h
+++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h
@@ -194,6 +194,8 @@
  */
 #define PCU_PACKAGE_POWER_SKU  _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x5930)
 #define   PKG_PKG_TDP  GENMASK_ULL(14, 0)
+#define   PKG_MIN_PWR  GENMASK_ULL(30, 16)
+#define   PKG_MAX_PWR  GENMASK_ULL(46, 32)
 #define   PKG_MAX_WIN  GENMASK_ULL(54, 48)
 #define PKG_MAX_WIN_X  GENMASK_ULL(54, 53)
 #define PKG_MAX_WIN_Y  GENMASK_ULL(52, 48)
-- 
2.38.0



[PATCH] drm/i915/hwmon: Display clamped PL1 limit

2022-12-15 Thread Ashutosh Dixit
HW allows arbitrary PL1 limits to be set but silently clamps these values
to "typical but not guaranteed" min/max values in pkg_power_sku
register. Follow the same pattern for sysfs, allow arbitrary PL1 limits to
be set but display clamped values when read.

Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c| 39 
 drivers/gpu/drm/i915/intel_mchbar_regs.h |  2 ++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index cca7a4350ec8f..1225bc432f0d5 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -359,6 +359,38 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 
attr, int chan)
}
 }
 
+/*
+ * HW allows arbitrary PL1 limits to be set but silently clamps these values to
+ * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow the
+ * same pattern for sysfs, allow arbitrary PL1 limits to be set but display
+ * clamped values when read. Write/read I1 also follows the same pattern.
+ */
+static int
+hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
+{
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
+   u64 r, min, max;
+
+   *val = hwm_field_read_and_scale(ddat,
+   hwmon->rg.pkg_rapl_limit,
+   PKG_PWR_LIM_1,
+   hwmon->scl_shift_power,
+   SF_POWER);
+
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read64(ddat->uncore, hwmon->rg.pkg_power_sku);
+   min = REG_FIELD_GET(PKG_MIN_PWR, r);
+   min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+   max = REG_FIELD_GET(PKG_MAX_PWR, r);
+   max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+
+   if (min && max)
+   *val = clamp_t(u64, *val, min, max);
+
+   return 0;
+}
+
 static int
 hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
 {
@@ -368,12 +400,7 @@ hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int 
chan, long *val)
 
switch (attr) {
case hwmon_power_max:
-   *val = hwm_field_read_and_scale(ddat,
-   hwmon->rg.pkg_rapl_limit,
-   PKG_PWR_LIM_1,
-   hwmon->scl_shift_power,
-   SF_POWER);
-   return 0;
+   return hwm_power_max_read(ddat, val);
case hwmon_power_rated_max:
*val = hwm_field_read_and_scale(ddat,
hwmon->rg.pkg_power_sku,
diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h 
b/drivers/gpu/drm/i915/intel_mchbar_regs.h
index f93e9af43ac35..73900c098d591 100644
--- a/drivers/gpu/drm/i915/intel_mchbar_regs.h
+++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h
@@ -194,6 +194,8 @@
  */
 #define PCU_PACKAGE_POWER_SKU  _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x5930)
 #define   PKG_PKG_TDP  GENMASK_ULL(14, 0)
+#define   PKG_MIN_PWR  GENMASK_ULL(30, 16)
+#define   PKG_MAX_PWR  GENMASK_ULL(46, 32)
 #define   PKG_MAX_WIN  GENMASK_ULL(54, 48)
 #define PKG_MAX_WIN_X  GENMASK_ULL(54, 53)
 #define PKG_MAX_WIN_Y  GENMASK_ULL(52, 48)
-- 
2.38.0



[PATCH] drm/i915/hwmon: Don't use FIELD_PREP

2022-10-31 Thread Ashutosh Dixit
FIELD_PREP and REG_FIELD_PREP have checks requiring a compile time constant
mask. When the mask comes in as the argument of a function these checks can
can fail depending on the compiler (gcc vs clang), optimization level,
etc. Use a simpler version of FIELD_PREP which skips these checks. The
checks are not needed because the mask is formed using REG_GENMASK (so is
actually a compile time constant).

v2: Split REG_FIELD_PREP into a macro with checks and one without and use
the one without checks in i915_hwmon.c (Gwan-gyeong Mun)

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7354
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c|  2 +-
 drivers/gpu/drm/i915/i915_reg_defs.h | 17 +++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 9e97814930254..ae435b035229a 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -112,7 +112,7 @@ hwm_field_scale_and_write(struct hwm_drvdata *ddat, 
i915_reg_t rgadr,
nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
 
bits_to_clear = field_msk;
-   bits_to_set = FIELD_PREP(field_msk, nval);
+   bits_to_set = __REG_FIELD_PREP(field_msk, nval);
 
hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
bits_to_clear, bits_to_set);
diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h 
b/drivers/gpu/drm/i915/i915_reg_defs.h
index f1859046a9c48..dddacc8d48928 100644
--- a/drivers/gpu/drm/i915/i915_reg_defs.h
+++ b/drivers/gpu/drm/i915/i915_reg_defs.h
@@ -67,12 +67,17 @@
  *
  * @return: @__val masked and shifted into the field defined by @__mask.
  */
-#define REG_FIELD_PREP(__mask, __val)  
\
-   ((u32)typeof(__mask))(__val) << __bf_shf(__mask)) & (__mask)) + 
\
-  BUILD_BUG_ON_ZERO(!__is_constexpr(__mask)) + \
-  BUILD_BUG_ON_ZERO((__mask) == 0 || (__mask) > U32_MAX) + 
\
-  BUILD_BUG_ON_ZERO(!IS_POWER_OF_2((__mask) + (1ULL << 
__bf_shf(__mask + \
-  BUILD_BUG_ON_ZERO(__builtin_choose_expr(__is_constexpr(__val), 
(~((__mask) >> __bf_shf(__mask)) & (__val)), 0
+#define __REG_FIELD_PREP_CHK(__mask, __val) \
+   (BUILD_BUG_ON_ZERO(!__is_constexpr(__mask)) + \
+BUILD_BUG_ON_ZERO((__mask) == 0 || (__mask) > U32_MAX) + \
+BUILD_BUG_ON_ZERO(!IS_POWER_OF_2((__mask) + (1ULL << 
__bf_shf(__mask + \
+BUILD_BUG_ON_ZERO(__builtin_choose_expr(__is_constexpr(__val), 
(~((__mask) >> __bf_shf(__mask)) & (__val)), 0)))
+
+#define __REG_FIELD_PREP(__mask, __val) \
+   ((u32)typeof(__mask))(__val) << __bf_shf(__mask)) & (__mask
+
+#define REG_FIELD_PREP(__mask, __val) \
+   (__REG_FIELD_PREP(__mask, __val) + __REG_FIELD_PREP_CHK(__mask, __val))
 
 /**
  * REG_FIELD_GET() - Extract a u32 bitfield value
-- 
2.38.0



[PATCH] drm/i915/hwmon: Don't use FIELD_PREP

2022-10-30 Thread Ashutosh Dixit
FIELD_PREP and REG_FIELD_PREP have checks requiring a compile time constant
mask. When the mask comes in as the argument of a function these checks can
can fail depending on the compiler (gcc vs clang), optimization level,
etc. Use a simpler local version of FIELD_PREP which skips these
checks. The checks are not needed because the mask is formed using
REG_GENMASK (so is actually a compile time constant).

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7354
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/i915_hwmon.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 9e97814930254..a3ec9a73a4e49 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -62,6 +62,12 @@ struct i915_hwmon {
int scl_shift_time;
 };
 
+/* FIELD_PREP and REG_FIELD_PREP require a compile time constant mask */
+static u32 hwm_field_prep(u32 mask, u32 val)
+{
+   return (val << __bf_shf(mask)) & mask;
+}
+
 static void
 hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
i915_reg_t reg, u32 clear, u32 set)
@@ -112,7 +118,7 @@ hwm_field_scale_and_write(struct hwm_drvdata *ddat, 
i915_reg_t rgadr,
nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
 
bits_to_clear = field_msk;
-   bits_to_set = FIELD_PREP(field_msk, nval);
+   bits_to_set = hwm_field_prep(field_msk, nval);
 
hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
bits_to_clear, bits_to_set);
-- 
2.38.0



[PATCH 2/5] drm/i915: Use GEN12_RPSTAT register for GT freq

2022-10-24 Thread Ashutosh Dixit
From: Don Hiatt 

On GEN12+ use GEN12_RPSTAT register to get actual resolved GT
freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if
GT is in RC6.

v2:
  - Fixed review comments(Ashutosh)
  - Added function intel_rps_read_rpstat_fw to read RPSTAT without
forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko)
v3:
  - Updated commit title and message for more clarity (Ashutosh)
  - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in
read_cagf (Ashutosh)
v4: Remove GEN12_CAGF_SHIFT and use REG_FIELD_GET (Rodrigo)

Cc: Don Hiatt 
Cc: Andi Shyti 
Signed-off-by: Don Hiatt 
Signed-off-by: Badal Nilawar 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Andi Shyti 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_rps.c | 32 +
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 ++
 drivers/gpu/drm/i915/i915_pmu.c |  3 +--
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 35c039573294c..f8c4f758ac0b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1539,6 +1539,7 @@
 
 #define GEN12_RPSTAT1  _MMIO(0x1381b4)
 #define   GEN12_VOLTAGE_MASK   REG_GENMASK(10, 0)
+#define   GEN12_CAGF_MASK  REG_GENMASK(19, 11)
 
 #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4))
 #define   GEN11_CSME   (31)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 5061b5b3cece7..02f69cbae5162 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2066,12 +2066,34 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
+u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
+}
+
+u32 intel_rps_read_rpstat(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
+}
+
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER(i915) >= 12)
+   cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = REG_FIELD_GET(RPE_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 9)
cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
@@ -2091,7 +2113,9 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+   if (GRAPHICS_VER(i915) >= 12) {
+   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
@@ -2257,7 +2281,7 @@ static void rps_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
 
-   rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   rpstat = intel_rps_read_rpstat(rps);
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & 
GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & 
GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & 
GEN6_CURBSYTAVG_MASK;
@@ -2392,7 +2416,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
   rps->pm_intrmsk_mbz);
-   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, 
GEN6_RPSTAT1));
+   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps));
drm_printf(p, "RPNSWREQ: %dMHz\n", 
intel_rps_get_requested_frequency(rps));
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
   intel_gpu_freq(rps, caps.min_freq));
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index 110300dfd4383..9e1cad9ba0e9c 100644
--- a/dri

[PATCH 3/5] drm/i915/mtl: Modify CAGF functions for MTL

2022-10-24 Thread Ashutosh Dixit
From: Badal Nilawar 

Update CAGF functions for MTL to get actual resolved frequency of 3D and
SAMedia.

v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR)
Move MTL branches in cagf functions to top (MattR)
Fix commit message (Andi)
v3: Added comment about registers not needing forcewake for Gen12+ and
returning 0 freq in RC6
v4: Use REG_FIELD_GET and uncore (Rodrigo)

Bspec: 66300

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
Reviewed-by: Ashutosh Dixit 
Acked-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  4 
 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index f8c4f758ac0b1..d8dbd0ac3b064 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -21,6 +21,10 @@
  */
 #define PERF_REG(offset)   _MMIO(offset)
 
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
+#define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 02f69cbae5162..eb3343a217947 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2091,7 +2091,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (GRAPHICS_VER(i915) >= 12)
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat);
+   else if (GRAPHICS_VER(i915) >= 12)
cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = REG_FIELD_GET(RPE_MASK, rpstat);
@@ -2113,7 +2115,13 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (GRAPHICS_VER(i915) >= 12) {
+   /*
+* For Gen12+ reading freq from HW does not need a forcewake and
+* registers will return 0 freq when GT is in RC6
+*/
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+   freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+   } else if (GRAPHICS_VER(i915) >= 12) {
freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
-- 
2.38.0



[PATCH 4/5] drm/i915/gt: Use RC6 residency types as arguments to residency functions

2022-10-24 Thread Ashutosh Dixit
Previously RC6 residency functions directly accepted RC6 residency register
MMIO offsets (there are four RC6 residency registers). This worked but
required an assumption on the residency register layout so was not future
proof.

Therefore change RC6 residency functions to accept RC6 residency types
instead of register MMIO offsets. The knowledge of register offsets as well
as ID to offset mapping is now maintained solely in intel_rc6 and can be
tailored for different platforms and different register layouts as need
arises.

v2: Address review comments by Jani N
- Change residency functions to accept RC6 residency types instead of
  register ID's
- s/intel_rc6_print_rc5_res/intel_rc6_print_residency/
- Remove "const enum" in function arguments
- Naming: intel_rc6_* for enum
- Use INTEL_RC6_RES_MAX and other minor changes
v3: Don't include intel_rc6_types.h in intel_rc6.h (Jani)

Suggested-by: Rodrigo Vivi 
Suggested-by: Jani Nikula 
Reported-by: Jani Nikula 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 ++--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 55 +++
 drivers/gpu/drm/i915/gt/intel_rc6.h   | 11 ++--
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 -
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  6 +-
 7 files changed, 72 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 979e602946549..5d6b346831393 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data)
 }
 DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
 
-static void print_rc6_res(struct seq_file *m,
- const char *title,
- const i915_reg_t reg)
-{
-   struct intel_gt *gt = m->private;
-   intel_wakeref_t wakeref;
-
-   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   seq_printf(m, "%s %u (%llu us)\n", title,
-  intel_uncore_read(gt->uncore, reg),
-  intel_rc6_residency_us(>rc6, reg));
-}
-
 static int vlv_drpc(struct seq_file *m)
 {
struct intel_gt *gt = m->private;
@@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m)
seq_printf(m, "Media Power Well: %s\n",
   (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
 
-   print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+   intel_rc6_print_residency(m, "Render RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "Media RC6 residency since boot:", 
INTEL_RC6_RES_VLV_MEDIA);
 
return fw_domains_show(m, NULL);
 }
@@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m)
}
 
/* Not exactly sure what this is */
-   print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
- GEN6_GT_GFX_RC6_LOCKED);
-   print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
-   print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+   intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since 
boot:",
+ INTEL_RC6_RES_RC6_LOCKED);
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "RC6+ residency since boot:", 
INTEL_RC6_RES_RC6p);
+   intel_rc6_print_residency(m, "RC6++ residency since boot:", 
INTEL_RC6_RES_RC6pp);
 
if (GRAPHICS_VER(i915) <= 7) {
seq_printf(m, "RC6   voltage: %dmV\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 9041609523697..19a6e052c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct 
device_attribute *attr,
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
 
 #ifdef CONFIG_PM
-static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
+static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id)
 {
intel_wakeref_t wakeref;
u64 res = 0;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   res = intel_rc6_residency_us(>rc

[PATCH 5/5] drm/i915/mtl: C6 residency and C state type for MTL SAMedia

2022-10-24 Thread Ashutosh Dixit
From: Badal Nilawar 

Add support for C6 residency and C state type for MTL SAMedia. Also add
mtl_drpc.

v2: Fixed review comments (Ashutosh)
v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R)
Remove MTL_CC_SHIFT (Ashutosh)
Adapt to RC6 residency register code refactor (Jani N)
v4: Move MTL branch to top in drpc_show
v5: Use FORCEWAKE_MT identical to gen6_drpc (Ashutosh)
v6: Add MISSING_CASE for gt_core_status switch statement (Rodrigo)
Change state name for MTL_CC0 to C0 (from "on") (Rodrigo)
v7: Change state name for MTL_CC0 to RC0 (Rodrigo)

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 59 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  5 ++
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 17 --
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 5d6b346831393..83df4cd5e06cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -256,6 +256,61 @@ static int ilk_drpc(struct seq_file *m)
return 0;
 }
 
+static int mtl_drpc(struct seq_file *m)
+{
+   struct intel_gt *gt = m->private;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 gt_core_status, rcctl1, mt_fwake_req;
+   u32 mtl_powergate_enable = 0, mtl_powergate_status = 0;
+
+   mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
+   gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+
+   rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+   mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
+   mtl_powergate_status = intel_uncore_read(uncore,
+GEN9_PWRGT_DOMAIN_STATUS);
+
+   seq_printf(m, "RC6 Enabled: %s\n",
+  str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+   if (gt->type == GT_MEDIA) {
+   seq_printf(m, "Media Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_MEDIA_PG_ENABLE));
+   } else {
+   seq_printf(m, "Render Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_RENDER_PG_ENABLE));
+   }
+
+   seq_puts(m, "Current RC state: ");
+   switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) {
+   case MTL_CC0:
+   seq_puts(m, "RC0\n");
+   break;
+   case MTL_CC6:
+   seq_puts(m, "RC6\n");
+   break;
+   default:
+   MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status));
+   seq_puts(m, "Unknown\n");
+   break;
+   }
+
+   seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req);
+   if (gt->type == GT_MEDIA)
+   seq_printf(m, "Media Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+   else
+   seq_printf(m, "Render Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+
+   /* Works for both render and media gt's */
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+
+   return fw_domains_show(m, NULL);
+}
+
 static int drpc_show(struct seq_file *m, void *unused)
 {
struct intel_gt *gt = m->private;
@@ -264,7 +319,9 @@ static int drpc_show(struct seq_file *m, void *unused)
int err = -ENODEV;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   err = mtl_drpc(m);
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
err = vlv_drpc(m);
else if (GRAPHICS_VER(i915) >= 6)
err = gen6_drpc(m);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index d8dbd0ac3b064..a0ddaf243593c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -24,6 +24,9 @@
 /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
 #define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
 #define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+#define   MTL_CC0  0x0
+#define   MTL_CC6  0x3
+#define   MTL_CC_MASK  

[PATCH 1/5] drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf

2022-10-24 Thread Ashutosh Dixit
Instead of masks/shifts settle on REG_FIELD_GET as the standard way to
extract reg fields. This allows future patches touching this code to also
consistently use REG_FIELD_GET and friends.

Suggested-by: Rodrigo Vivi 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 12 
 drivers/gpu/drm/i915/gt/intel_rps.c   | 11 +--
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 40d0a3be42acf..979e602946549 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -307,7 +307,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct 
drm_printer *p)
drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) 
>>
   MEMSTAT_VID_SHIFT);
drm_printf(p, "Current P-state: %d\n",
-  (rgvstat & MEMSTAT_PSTATE_MASK) >> 
MEMSTAT_PSTATE_SHIFT);
+  REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rgvstat));
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
u32 rpmodectl, freq_sts;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 36d95b79022c0..35c039573294c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -794,12 +794,9 @@
 #define GEN6_RP_DOWN_TIMEOUT   _MMIO(0xa010)
 #define GEN6_RP_INTERRUPT_LIMITS   _MMIO(0xa014)
 #define GEN6_RPSTAT1   _MMIO(0xa01c)
-#define   GEN6_CAGF_SHIFT  8
-#define   HSW_CAGF_SHIFT   7
-#define   GEN9_CAGF_SHIFT  23
-#define   GEN6_CAGF_MASK   (0x7f << GEN6_CAGF_SHIFT)
-#define   HSW_CAGF_MASK(0x7f << HSW_CAGF_SHIFT)
-#define   GEN9_CAGF_MASK   (0x1ff << GEN9_CAGF_SHIFT)
+#define   GEN6_CAGF_MASK   REG_GENMASK(14, 8)
+#define   HSW_CAGF_MASKREG_GENMASK(13, 7)
+#define   GEN9_CAGF_MASK   REG_GENMASK(31, 23)
 #define GEN6_RP_CONTROL_MMIO(0xa024)
 #define   GEN6_RP_MEDIA_TURBO  (1 << 11)
 #define   GEN6_RP_MEDIA_MODE_MASK  (3 << 9)
@@ -1370,8 +1367,7 @@
 #define MEMSTAT_ILK_MMIO(0x111f8)
 #define   MEMSTAT_VID_MASK 0x7f00
 #define   MEMSTAT_VID_SHIFT8
-#define   MEMSTAT_PSTATE_MASK  0x00f8
-#define   MEMSTAT_PSTATE_SHIFT 3
+#define   MEMSTAT_PSTATE_MASK  REG_GENMASK(7, 3)
 #define   MEMSTAT_MON_ACTV (1 << 2)
 #define   MEMSTAT_SRC_CTL_MASK 0x0003
 #define   MEMSTAT_SRC_CTL_CORE 0
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 070005dd0da47..5061b5b3cece7 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2072,16 +2072,15 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
rpstat)
u32 cagf;
 
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
-   cagf = (rpstat >> 8) & 0xff;
+   cagf = REG_FIELD_GET(RPE_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 9)
-   cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-   cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 6)
-   cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat);
else
-   cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
-   MEMSTAT_PSTATE_SHIFT);
+   cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, 
rpstat));
 
return cagf;
 }
-- 
2.38.0



[PATCH 0/5] i915: CAGF and RC6 changes for MTL

2022-10-24 Thread Ashutosh Dixit
This series includes the code changes to get CAGF, RC State and C6
Residency of MTL.

v3: Included "Use GEN12 RPSTAT register" patch

v4:
  - Rebased
  - Dropped "Use GEN12 RPSTAT register" patch from this series
going to send separate series for it

v5:
  - Included "drm/i915/gt: Change RC6 residency functions to accept register
ID's" based on code review feedback

v6:
  - Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency
functions to accept register ID's"
  - Re-add "drm/i915: Use GEN12_RPSTAT register for GT freq" to this series

v7: Rebuild, identical to v6

v8:
  - Add "drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf" to the series
(based on Rodrigo's review) to consistently use REG_FIELD_GET
  - Minor changes to other patches, please see individual patches for changelogs

v9: Rebuild, identical to v8

v10: Address review comments from Rodrigo on Patch 5

v11: Change state name for MTL_CC0 to RC0 in Patch 5

Ashutosh Dixit (2):
  drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf
  drm/i915/gt: Use RC6 residency types as arguments to residency
functions

Badal Nilawar (2):
  drm/i915/mtl: Modify CAGF functions for MTL
  drm/i915/mtl: C6 residency and C state type for MTL SAMedia

Don Hiatt (1):
  drm/i915: Use GEN12_RPSTAT register for GT freq

 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 88 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 22 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 +--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 64 +-
 drivers/gpu/drm/i915/gt/intel_rc6.h   | 11 ++-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++-
 drivers/gpu/drm/i915/gt/intel_rps.c   | 51 ---
 drivers/gpu/drm/i915/gt/intel_rps.h   |  2 +
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  9 +-
 10 files changed, 198 insertions(+), 82 deletions(-)

-- 
2.38.0



[PATCH 3/5] drm/i915/mtl: Modify CAGF functions for MTL

2022-10-24 Thread Ashutosh Dixit
From: Badal Nilawar 

Update CAGF functions for MTL to get actual resolved frequency of 3D and
SAMedia.

v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR)
Move MTL branches in cagf functions to top (MattR)
Fix commit message (Andi)
v3: Added comment about registers not needing forcewake for Gen12+ and
returning 0 freq in RC6
v4: Use REG_FIELD_GET and uncore (Rodrigo)

Bspec: 66300

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
Reviewed-by: Ashutosh Dixit 
Acked-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  4 
 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index f8c4f758ac0b1..d8dbd0ac3b064 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -21,6 +21,10 @@
  */
 #define PERF_REG(offset)   _MMIO(offset)
 
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
+#define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 02f69cbae5162..eb3343a217947 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2091,7 +2091,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (GRAPHICS_VER(i915) >= 12)
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat);
+   else if (GRAPHICS_VER(i915) >= 12)
cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = REG_FIELD_GET(RPE_MASK, rpstat);
@@ -2113,7 +2115,13 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (GRAPHICS_VER(i915) >= 12) {
+   /*
+* For Gen12+ reading freq from HW does not need a forcewake and
+* registers will return 0 freq when GT is in RC6
+*/
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+   freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+   } else if (GRAPHICS_VER(i915) >= 12) {
freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
-- 
2.38.0



[PATCH 2/5] drm/i915: Use GEN12_RPSTAT register for GT freq

2022-10-24 Thread Ashutosh Dixit
From: Don Hiatt 

On GEN12+ use GEN12_RPSTAT register to get actual resolved GT
freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if
GT is in RC6.

v2:
  - Fixed review comments(Ashutosh)
  - Added function intel_rps_read_rpstat_fw to read RPSTAT without
forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko)
v3:
  - Updated commit title and message for more clarity (Ashutosh)
  - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in
read_cagf (Ashutosh)
v4: Remove GEN12_CAGF_SHIFT and use REG_FIELD_GET (Rodrigo)

Cc: Don Hiatt 
Cc: Andi Shyti 
Signed-off-by: Don Hiatt 
Signed-off-by: Badal Nilawar 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Andi Shyti 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_rps.c | 32 +
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 ++
 drivers/gpu/drm/i915/i915_pmu.c |  3 +--
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 35c039573294c..f8c4f758ac0b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1539,6 +1539,7 @@
 
 #define GEN12_RPSTAT1  _MMIO(0x1381b4)
 #define   GEN12_VOLTAGE_MASK   REG_GENMASK(10, 0)
+#define   GEN12_CAGF_MASK  REG_GENMASK(19, 11)
 
 #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4))
 #define   GEN11_CSME   (31)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 5061b5b3cece7..02f69cbae5162 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2066,12 +2066,34 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
+u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
+}
+
+u32 intel_rps_read_rpstat(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
+}
+
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER(i915) >= 12)
+   cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = REG_FIELD_GET(RPE_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 9)
cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
@@ -2091,7 +2113,9 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+   if (GRAPHICS_VER(i915) >= 12) {
+   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
@@ -2257,7 +2281,7 @@ static void rps_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
 
-   rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   rpstat = intel_rps_read_rpstat(rps);
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & 
GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & 
GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & 
GEN6_CURBSYTAVG_MASK;
@@ -2392,7 +2416,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
   rps->pm_intrmsk_mbz);
-   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, 
GEN6_RPSTAT1));
+   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps));
drm_printf(p, "RPNSWREQ: %dMHz\n", 
intel_rps_get_requested_frequency(rps));
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
   intel_gpu_freq(rps, caps.min_freq));
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index 110300dfd4383..9e1cad9ba0e9c 100644
--- a/dri

[PATCH 5/5] drm/i915/mtl: C6 residency and C state type for MTL SAMedia

2022-10-24 Thread Ashutosh Dixit
From: Badal Nilawar 

Add support for C6 residency and C state type for MTL SAMedia. Also add
mtl_drpc.

v2: Fixed review comments (Ashutosh)
v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R)
Remove MTL_CC_SHIFT (Ashutosh)
Adapt to RC6 residency register code refactor (Jani N)
v4: Move MTL branch to top in drpc_show
v5: Use FORCEWAKE_MT identical to gen6_drpc (Ashutosh)
v6: Add MISSING_CASE for gt_core_status switch statement (Rodrigo)
Change state name for MTL_CC0 to C0 (from "on") (Rodrigo)

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 59 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  5 ++
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 17 --
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 5d6b346831393..522049f053e8a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -256,6 +256,61 @@ static int ilk_drpc(struct seq_file *m)
return 0;
 }
 
+static int mtl_drpc(struct seq_file *m)
+{
+   struct intel_gt *gt = m->private;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 gt_core_status, rcctl1, mt_fwake_req;
+   u32 mtl_powergate_enable = 0, mtl_powergate_status = 0;
+
+   mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
+   gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+
+   rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+   mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
+   mtl_powergate_status = intel_uncore_read(uncore,
+GEN9_PWRGT_DOMAIN_STATUS);
+
+   seq_printf(m, "RC6 Enabled: %s\n",
+  str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+   if (gt->type == GT_MEDIA) {
+   seq_printf(m, "Media Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_MEDIA_PG_ENABLE));
+   } else {
+   seq_printf(m, "Render Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_RENDER_PG_ENABLE));
+   }
+
+   seq_puts(m, "Current RC state: ");
+   switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) {
+   case MTL_CC0:
+   seq_puts(m, "C0\n");
+   break;
+   case MTL_CC6:
+   seq_puts(m, "RC6\n");
+   break;
+   default:
+   MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status));
+   seq_puts(m, "Unknown\n");
+   break;
+   }
+
+   seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req);
+   if (gt->type == GT_MEDIA)
+   seq_printf(m, "Media Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+   else
+   seq_printf(m, "Render Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+
+   /* Works for both render and media gt's */
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+
+   return fw_domains_show(m, NULL);
+}
+
 static int drpc_show(struct seq_file *m, void *unused)
 {
struct intel_gt *gt = m->private;
@@ -264,7 +319,9 @@ static int drpc_show(struct seq_file *m, void *unused)
int err = -ENODEV;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   err = mtl_drpc(m);
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
err = vlv_drpc(m);
else if (GRAPHICS_VER(i915) >= 6)
err = gen6_drpc(m);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index d8dbd0ac3b064..a0ddaf243593c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -24,6 +24,9 @@
 /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
 #define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
 #define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+#define   MTL_CC0  0x0
+#define   MTL_CC6  0x3
+#define   MTL_CC_MASK  REG_GENMASK(12, 9)
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0

[PATCH 4/5] drm/i915/gt: Use RC6 residency types as arguments to residency functions

2022-10-24 Thread Ashutosh Dixit
Previously RC6 residency functions directly accepted RC6 residency register
MMIO offsets (there are four RC6 residency registers). This worked but
required an assumption on the residency register layout so was not future
proof.

Therefore change RC6 residency functions to accept RC6 residency types
instead of register MMIO offsets. The knowledge of register offsets as well
as ID to offset mapping is now maintained solely in intel_rc6 and can be
tailored for different platforms and different register layouts as need
arises.

v2: Address review comments by Jani N
- Change residency functions to accept RC6 residency types instead of
  register ID's
- s/intel_rc6_print_rc5_res/intel_rc6_print_residency/
- Remove "const enum" in function arguments
- Naming: intel_rc6_* for enum
- Use INTEL_RC6_RES_MAX and other minor changes
v3: Don't include intel_rc6_types.h in intel_rc6.h (Jani)

Suggested-by: Rodrigo Vivi 
Suggested-by: Jani Nikula 
Reported-by: Jani Nikula 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 ++--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 55 +++
 drivers/gpu/drm/i915/gt/intel_rc6.h   | 11 ++--
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 -
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  6 +-
 7 files changed, 72 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 979e602946549..5d6b346831393 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data)
 }
 DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
 
-static void print_rc6_res(struct seq_file *m,
- const char *title,
- const i915_reg_t reg)
-{
-   struct intel_gt *gt = m->private;
-   intel_wakeref_t wakeref;
-
-   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   seq_printf(m, "%s %u (%llu us)\n", title,
-  intel_uncore_read(gt->uncore, reg),
-  intel_rc6_residency_us(>rc6, reg));
-}
-
 static int vlv_drpc(struct seq_file *m)
 {
struct intel_gt *gt = m->private;
@@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m)
seq_printf(m, "Media Power Well: %s\n",
   (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
 
-   print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+   intel_rc6_print_residency(m, "Render RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "Media RC6 residency since boot:", 
INTEL_RC6_RES_VLV_MEDIA);
 
return fw_domains_show(m, NULL);
 }
@@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m)
}
 
/* Not exactly sure what this is */
-   print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
- GEN6_GT_GFX_RC6_LOCKED);
-   print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
-   print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+   intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since 
boot:",
+ INTEL_RC6_RES_RC6_LOCKED);
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "RC6+ residency since boot:", 
INTEL_RC6_RES_RC6p);
+   intel_rc6_print_residency(m, "RC6++ residency since boot:", 
INTEL_RC6_RES_RC6pp);
 
if (GRAPHICS_VER(i915) <= 7) {
seq_printf(m, "RC6   voltage: %dmV\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 9041609523697..19a6e052c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct 
device_attribute *attr,
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
 
 #ifdef CONFIG_PM
-static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
+static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id)
 {
intel_wakeref_t wakeref;
u64 res = 0;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   res = intel_rc6_residency_us(>rc

[PATCH 0/5] i915: CAGF and RC6 changes for MTL

2022-10-24 Thread Ashutosh Dixit
This series includes the code changes to get CAGF, RC State and C6
Residency of MTL.

v3: Included "Use GEN12 RPSTAT register" patch

v4:
  - Rebased
  - Dropped "Use GEN12 RPSTAT register" patch from this series
going to send separate series for it

v5:
  - Included "drm/i915/gt: Change RC6 residency functions to accept register
ID's" based on code review feedback

v6:
  - Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency
functions to accept register ID's"
  - Re-add "drm/i915: Use GEN12_RPSTAT register for GT freq" to this series

v7: Rebuild, identical to v6

v8:
  - Add "drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf" to the series
(based on Rodrigo's review) to consistently use REG_FIELD_GET
  - Minor changes to other patches, please see individual patches for changelogs

v9: Rebuild, identical to v8

v10: Address review comments from Rodrigo on Patch 5

Ashutosh Dixit (2):
  drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf
  drm/i915/gt: Use RC6 residency types as arguments to residency
functions

Badal Nilawar (2):
  drm/i915/mtl: Modify CAGF functions for MTL
  drm/i915/mtl: C6 residency and C state type for MTL SAMedia

Don Hiatt (1):
  drm/i915: Use GEN12_RPSTAT register for GT freq

 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 88 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 22 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 +--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 64 +-
 drivers/gpu/drm/i915/gt/intel_rc6.h   | 11 ++-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++-
 drivers/gpu/drm/i915/gt/intel_rps.c   | 51 ---
 drivers/gpu/drm/i915/gt/intel_rps.h   |  2 +
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  9 +-
 10 files changed, 198 insertions(+), 82 deletions(-)

-- 
2.38.0



[PATCH 1/5] drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf

2022-10-24 Thread Ashutosh Dixit
Instead of masks/shifts settle on REG_FIELD_GET as the standard way to
extract reg fields. This allows future patches touching this code to also
consistently use REG_FIELD_GET and friends.

Suggested-by: Rodrigo Vivi 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 12 
 drivers/gpu/drm/i915/gt/intel_rps.c   | 11 +--
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 40d0a3be42acf..979e602946549 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -307,7 +307,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct 
drm_printer *p)
drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) 
>>
   MEMSTAT_VID_SHIFT);
drm_printf(p, "Current P-state: %d\n",
-  (rgvstat & MEMSTAT_PSTATE_MASK) >> 
MEMSTAT_PSTATE_SHIFT);
+  REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rgvstat));
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
u32 rpmodectl, freq_sts;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 36d95b79022c0..35c039573294c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -794,12 +794,9 @@
 #define GEN6_RP_DOWN_TIMEOUT   _MMIO(0xa010)
 #define GEN6_RP_INTERRUPT_LIMITS   _MMIO(0xa014)
 #define GEN6_RPSTAT1   _MMIO(0xa01c)
-#define   GEN6_CAGF_SHIFT  8
-#define   HSW_CAGF_SHIFT   7
-#define   GEN9_CAGF_SHIFT  23
-#define   GEN6_CAGF_MASK   (0x7f << GEN6_CAGF_SHIFT)
-#define   HSW_CAGF_MASK(0x7f << HSW_CAGF_SHIFT)
-#define   GEN9_CAGF_MASK   (0x1ff << GEN9_CAGF_SHIFT)
+#define   GEN6_CAGF_MASK   REG_GENMASK(14, 8)
+#define   HSW_CAGF_MASKREG_GENMASK(13, 7)
+#define   GEN9_CAGF_MASK   REG_GENMASK(31, 23)
 #define GEN6_RP_CONTROL_MMIO(0xa024)
 #define   GEN6_RP_MEDIA_TURBO  (1 << 11)
 #define   GEN6_RP_MEDIA_MODE_MASK  (3 << 9)
@@ -1370,8 +1367,7 @@
 #define MEMSTAT_ILK_MMIO(0x111f8)
 #define   MEMSTAT_VID_MASK 0x7f00
 #define   MEMSTAT_VID_SHIFT8
-#define   MEMSTAT_PSTATE_MASK  0x00f8
-#define   MEMSTAT_PSTATE_SHIFT 3
+#define   MEMSTAT_PSTATE_MASK  REG_GENMASK(7, 3)
 #define   MEMSTAT_MON_ACTV (1 << 2)
 #define   MEMSTAT_SRC_CTL_MASK 0x0003
 #define   MEMSTAT_SRC_CTL_CORE 0
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 070005dd0da47..5061b5b3cece7 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2072,16 +2072,15 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
rpstat)
u32 cagf;
 
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
-   cagf = (rpstat >> 8) & 0xff;
+   cagf = REG_FIELD_GET(RPE_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 9)
-   cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-   cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 6)
-   cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat);
else
-   cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
-   MEMSTAT_PSTATE_SHIFT);
+   cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, 
rpstat));
 
return cagf;
 }
-- 
2.38.0



[PATCH 1/5] drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf

2022-10-19 Thread Ashutosh Dixit
Instead of masks/shifts settle on REG_FIELD_GET as the standard way to
extract reg fields. This allows future patches touching this code to also
consistently use REG_FIELD_GET and friends.

Suggested-by: Rodrigo Vivi 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 12 
 drivers/gpu/drm/i915/gt/intel_rps.c   | 11 +--
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 40d0a3be42acf..979e602946549 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -307,7 +307,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct 
drm_printer *p)
drm_printf(p, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) 
>>
   MEMSTAT_VID_SHIFT);
drm_printf(p, "Current P-state: %d\n",
-  (rgvstat & MEMSTAT_PSTATE_MASK) >> 
MEMSTAT_PSTATE_SHIFT);
+  REG_FIELD_GET(MEMSTAT_PSTATE_MASK, rgvstat));
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
u32 rpmodectl, freq_sts;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 36d95b79022c0..35c039573294c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -794,12 +794,9 @@
 #define GEN6_RP_DOWN_TIMEOUT   _MMIO(0xa010)
 #define GEN6_RP_INTERRUPT_LIMITS   _MMIO(0xa014)
 #define GEN6_RPSTAT1   _MMIO(0xa01c)
-#define   GEN6_CAGF_SHIFT  8
-#define   HSW_CAGF_SHIFT   7
-#define   GEN9_CAGF_SHIFT  23
-#define   GEN6_CAGF_MASK   (0x7f << GEN6_CAGF_SHIFT)
-#define   HSW_CAGF_MASK(0x7f << HSW_CAGF_SHIFT)
-#define   GEN9_CAGF_MASK   (0x1ff << GEN9_CAGF_SHIFT)
+#define   GEN6_CAGF_MASK   REG_GENMASK(14, 8)
+#define   HSW_CAGF_MASKREG_GENMASK(13, 7)
+#define   GEN9_CAGF_MASK   REG_GENMASK(31, 23)
 #define GEN6_RP_CONTROL_MMIO(0xa024)
 #define   GEN6_RP_MEDIA_TURBO  (1 << 11)
 #define   GEN6_RP_MEDIA_MODE_MASK  (3 << 9)
@@ -1370,8 +1367,7 @@
 #define MEMSTAT_ILK_MMIO(0x111f8)
 #define   MEMSTAT_VID_MASK 0x7f00
 #define   MEMSTAT_VID_SHIFT8
-#define   MEMSTAT_PSTATE_MASK  0x00f8
-#define   MEMSTAT_PSTATE_SHIFT 3
+#define   MEMSTAT_PSTATE_MASK  REG_GENMASK(7, 3)
 #define   MEMSTAT_MON_ACTV (1 << 2)
 #define   MEMSTAT_SRC_CTL_MASK 0x0003
 #define   MEMSTAT_SRC_CTL_CORE 0
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2a..5bd6671554a6e 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2074,16 +2074,15 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
rpstat)
u32 cagf;
 
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
-   cagf = (rpstat >> 8) & 0xff;
+   cagf = REG_FIELD_GET(RPE_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 9)
-   cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
-   cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(HSW_CAGF_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 6)
-   cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+   cagf = REG_FIELD_GET(GEN6_CAGF_MASK, rpstat);
else
-   cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
-   MEMSTAT_PSTATE_SHIFT);
+   cagf = gen5_invert_freq(rps, REG_FIELD_GET(MEMSTAT_PSTATE_MASK, 
rpstat));
 
return cagf;
 }
-- 
2.38.0



[PATCH 5/5] drm/i915/mtl: C6 residency and C state type for MTL SAMedia

2022-10-19 Thread Ashutosh Dixit
From: Badal Nilawar 

Add support for C6 residency and C state type for MTL SAMedia. Also add
mtl_drpc.

v2: Fixed review comments (Ashutosh)
v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R)
Remove MTL_CC_SHIFT (Ashutosh)
Adapt to RC6 residency register code refactor (Jani N)
v4: Move MTL branch to top in drpc_show
v5: Use FORCEWAKE_MT identical to gen6_drpc (Ashutosh)

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 58 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  5 ++
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 17 --
 3 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 5d6b346831393..f15a7486a9866 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -256,6 +256,60 @@ static int ilk_drpc(struct seq_file *m)
return 0;
 }
 
+static int mtl_drpc(struct seq_file *m)
+{
+   struct intel_gt *gt = m->private;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 gt_core_status, rcctl1, mt_fwake_req;
+   u32 mtl_powergate_enable = 0, mtl_powergate_status = 0;
+
+   mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
+   gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+
+   rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+   mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
+   mtl_powergate_status = intel_uncore_read(uncore,
+GEN9_PWRGT_DOMAIN_STATUS);
+
+   seq_printf(m, "RC6 Enabled: %s\n",
+  str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+   if (gt->type == GT_MEDIA) {
+   seq_printf(m, "Media Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_MEDIA_PG_ENABLE));
+   } else {
+   seq_printf(m, "Render Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_RENDER_PG_ENABLE));
+   }
+
+   seq_puts(m, "Current RC state: ");
+   switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) {
+   case MTL_CC0:
+   seq_puts(m, "on\n");
+   break;
+   case MTL_CC6:
+   seq_puts(m, "RC6\n");
+   break;
+   default:
+   seq_puts(m, "Unknown\n");
+   break;
+   }
+
+   seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req);
+   if (gt->type == GT_MEDIA)
+   seq_printf(m, "Media Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+   else
+   seq_printf(m, "Render Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+
+   /* Works for both render and media gt's */
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+
+   return fw_domains_show(m, NULL);
+}
+
 static int drpc_show(struct seq_file *m, void *unused)
 {
struct intel_gt *gt = m->private;
@@ -264,7 +318,9 @@ static int drpc_show(struct seq_file *m, void *unused)
int err = -ENODEV;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   err = mtl_drpc(m);
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
err = vlv_drpc(m);
else if (GRAPHICS_VER(i915) >= 6)
err = gen6_drpc(m);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index d8dbd0ac3b064..a0ddaf243593c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -24,6 +24,9 @@
 /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
 #define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
 #define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+#define   MTL_CC0  0x0
+#define   MTL_CC6  0x3
+#define   MTL_CC_MASK  REG_GENMASK(12, 9)
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
@@ -1512,6 +1515,8 @@
 #define FORCEWAKE_MEDIA_VLV_MMIO(0x1300b8)
 #define FORCEWAKE_ACK_MEDIA_VLV_MMIO(0x1300bc)
 
+#define MTL_MEDIA_MC6

[PATCH 4/5] drm/i915/gt: Use RC6 residency types as arguments to residency functions

2022-10-19 Thread Ashutosh Dixit
Previously RC6 residency functions directly accepted RC6 residency register
MMIO offsets (there are four RC6 residency registers). This worked but
required an assumption on the residency register layout so was not future
proof.

Therefore change RC6 residency functions to accept RC6 residency types
instead of register MMIO offsets. The knowledge of register offsets as well
as ID to offset mapping is now maintained solely in intel_rc6 and can be
tailored for different platforms and different register layouts as need
arises.

v2: Address review comments by Jani N
- Change residency functions to accept RC6 residency types instead of
  register ID's
- s/intel_rc6_print_rc5_res/intel_rc6_print_residency/
- Remove "const enum" in function arguments
- Naming: intel_rc6_* for enum
- Use INTEL_RC6_RES_MAX and other minor changes
v3: Don't include intel_rc6_types.h in intel_rc6.h (Jani)

Suggested-by: Rodrigo Vivi 
Suggested-by: Jani Nikula 
Reported-by: Jani Nikula 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 ++--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 55 +++
 drivers/gpu/drm/i915/gt/intel_rc6.h   | 11 ++--
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 -
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  6 +-
 7 files changed, 72 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 979e602946549..5d6b346831393 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data)
 }
 DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
 
-static void print_rc6_res(struct seq_file *m,
- const char *title,
- const i915_reg_t reg)
-{
-   struct intel_gt *gt = m->private;
-   intel_wakeref_t wakeref;
-
-   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   seq_printf(m, "%s %u (%llu us)\n", title,
-  intel_uncore_read(gt->uncore, reg),
-  intel_rc6_residency_us(>rc6, reg));
-}
-
 static int vlv_drpc(struct seq_file *m)
 {
struct intel_gt *gt = m->private;
@@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m)
seq_printf(m, "Media Power Well: %s\n",
   (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
 
-   print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+   intel_rc6_print_residency(m, "Render RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "Media RC6 residency since boot:", 
INTEL_RC6_RES_VLV_MEDIA);
 
return fw_domains_show(m, NULL);
 }
@@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m)
}
 
/* Not exactly sure what this is */
-   print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
- GEN6_GT_GFX_RC6_LOCKED);
-   print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
-   print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+   intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since 
boot:",
+ INTEL_RC6_RES_RC6_LOCKED);
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "RC6+ residency since boot:", 
INTEL_RC6_RES_RC6p);
+   intel_rc6_print_residency(m, "RC6++ residency since boot:", 
INTEL_RC6_RES_RC6pp);
 
if (GRAPHICS_VER(i915) <= 7) {
seq_printf(m, "RC6   voltage: %dmV\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 9041609523697..19a6e052c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct 
device_attribute *attr,
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
 
 #ifdef CONFIG_PM
-static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
+static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id)
 {
intel_wakeref_t wakeref;
u64 res = 0;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   res = intel_rc6_residency_us(>rc6, reg);
+   res = i

[PATCH 2/5] drm/i915: Use GEN12_RPSTAT register for GT freq

2022-10-19 Thread Ashutosh Dixit
From: Don Hiatt 

On GEN12+ use GEN12_RPSTAT register to get actual resolved GT
freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if
GT is in RC6.

v2:
  - Fixed review comments(Ashutosh)
  - Added function intel_rps_read_rpstat_fw to read RPSTAT without
forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko)
v3:
  - Updated commit title and message for more clarity (Ashutosh)
  - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in
read_cagf (Ashutosh)
v4: Remove GEN12_CAGF_SHIFT and use REG_FIELD_GET (Rodrigo)

Cc: Don Hiatt 
Cc: Andi Shyti 
Signed-off-by: Don Hiatt 
Signed-off-by: Badal Nilawar 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_rps.c | 32 +
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 ++
 drivers/gpu/drm/i915/i915_pmu.c |  3 +--
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 35c039573294c..f8c4f758ac0b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1539,6 +1539,7 @@
 
 #define GEN12_RPSTAT1  _MMIO(0x1381b4)
 #define   GEN12_VOLTAGE_MASK   REG_GENMASK(10, 0)
+#define   GEN12_CAGF_MASK  REG_GENMASK(19, 11)
 
 #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4))
 #define   GEN11_CSME   (31)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index 5bd6671554a6e..da6b969f554b6 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2068,12 +2068,34 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
+u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
+}
+
+u32 intel_rps_read_rpstat(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
+}
+
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER(i915) >= 12)
+   cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = REG_FIELD_GET(RPE_MASK, rpstat);
else if (GRAPHICS_VER(i915) >= 9)
cagf = REG_FIELD_GET(GEN9_CAGF_MASK, rpstat);
@@ -2093,7 +2115,9 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+   if (GRAPHICS_VER(i915) >= 12) {
+   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
@@ -2259,7 +2283,7 @@ static void rps_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
 
-   rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   rpstat = intel_rps_read_rpstat(rps);
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & 
GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & 
GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & 
GEN6_CURBSYTAVG_MASK;
@@ -2394,7 +2418,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
   rps->pm_intrmsk_mbz);
-   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, 
GEN6_RPSTAT1));
+   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps));
drm_printf(p, "RPNSWREQ: %dMHz\n", 
intel_rps_get_requested_frequency(rps));
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
   intel_gpu_freq(rps, caps.min_freq));
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index 110300dfd4383..9e1cad9ba0e9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_r

[PATCH 3/5] drm/i915/mtl: Modify CAGF functions for MTL

2022-10-19 Thread Ashutosh Dixit
From: Badal Nilawar 

Update CAGF functions for MTL to get actual resolved frequency of 3D and
SAMedia.

v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR)
Move MTL branches in cagf functions to top (MattR)
Fix commit message (Andi)
v3: Added comment about registers not needing forcewake for Gen12+ and
returning 0 freq in RC6
v4: Use REG_FIELD_GET and uncore (Rodrigo)

Bspec: 66300

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  4 
 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index f8c4f758ac0b1..d8dbd0ac3b064 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -21,6 +21,10 @@
  */
 #define PERF_REG(offset)   _MMIO(offset)
 
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
+#define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index da6b969f554b6..63cc7c538401e 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2093,7 +2093,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (GRAPHICS_VER(i915) >= 12)
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   cagf = REG_FIELD_GET(MTL_CAGF_MASK, rpstat);
+   else if (GRAPHICS_VER(i915) >= 12)
cagf = REG_FIELD_GET(GEN12_CAGF_MASK, rpstat);
else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = REG_FIELD_GET(RPE_MASK, rpstat);
@@ -2115,7 +2117,13 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (GRAPHICS_VER(i915) >= 12) {
+   /*
+* For Gen12+ reading freq from HW does not need a forcewake and
+* registers will return 0 freq when GT is in RC6
+*/
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+   freq = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+   } else if (GRAPHICS_VER(i915) >= 12) {
freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
-- 
2.38.0



[PATCH 0/5] i915: CAGF and RC6 changes for MTL

2022-10-19 Thread Ashutosh Dixit
This series includes the code changes to get CAGF, RC State and C6
Residency of MTL.

v3: Included "Use GEN12 RPSTAT register" patch

v4:
  - Rebased
  - Dropped "Use GEN12 RPSTAT register" patch from this series
going to send separate series for it

v5:
  - Included "drm/i915/gt: Change RC6 residency functions to accept register
ID's" based on code review feedback

v6:
  - Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency
functions to accept register ID's"
  - Re-add "drm/i915: Use GEN12_RPSTAT register for GT freq" to this series

v7: Rebuild, identical to v6

v8:
  - Add "drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf" to the series
(based on Rodrigo's review) to consistently use REG_FIELD_GET
  - Minor changes to other patches, please see individual patches for changelogs

Ashutosh Dixit (2):
  drm/i915/rps: Prefer REG_FIELD_GET in intel_rps_get_cagf
  drm/i915/gt: Use RC6 residency types as arguments to residency
functions

Badal Nilawar (2):
  drm/i915/mtl: Modify CAGF functions for MTL
  drm/i915/mtl: C6 residency and C state type for MTL SAMedia

Don Hiatt (1):
  drm/i915: Use GEN12_RPSTAT register for GT freq

 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 87 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 22 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 +--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 64 +-
 drivers/gpu/drm/i915/gt/intel_rc6.h   | 11 ++-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++-
 drivers/gpu/drm/i915/gt/intel_rps.c   | 51 ---
 drivers/gpu/drm/i915/gt/intel_rps.h   |  2 +
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  9 +-
 10 files changed, 197 insertions(+), 82 deletions(-)

-- 
2.38.0



[PATCH 2/4] drm/i915/mtl: Modify CAGF functions for MTL

2022-10-18 Thread Ashutosh Dixit
From: Badal Nilawar 

Update CAGF functions for MTL to get actual resolved frequency of 3D and
SAMedia.

v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR)
Move MTL branches in cagf functions to top (MattR)
Fix commit message (Andi)
v3: Added comment about registers not needing forcewake for Gen12+ and
returning 0 freq in RC6

Bspec: 66300

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  4 
 drivers/gpu/drm/i915/gt/intel_rps.c | 12 ++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index a7a0129d0e3fc..b4b1b54ad738f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -21,6 +21,10 @@
  */
 #define PERF_REG(offset)   _MMIO(offset)
 
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
+#define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index df21258976d86..5a743ae4dd11e 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2093,7 +2093,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (GRAPHICS_VER(i915) >= 12)
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   cagf = rpstat & MTL_CAGF_MASK;
+   else if (GRAPHICS_VER(i915) >= 12)
cagf = (rpstat & GEN12_CAGF_MASK) >> GEN12_CAGF_SHIFT;
else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = (rpstat >> 8) & 0xff;
@@ -2116,7 +2118,13 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (GRAPHICS_VER(i915) >= 12) {
+   /*
+* For Gen12+ reading freq from HW does not need a forcewake and
+* registers will return 0 freq when GT is in RC6
+*/
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+   freq = intel_uncore_read(rps_to_gt(rps)->uncore, 
MTL_MIRROR_TARGET_WP1);
+   } else if (GRAPHICS_VER(i915) >= 12) {
freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
} else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
-- 
2.38.0



[PATCH 4/4] drm/i915/mtl: C6 residency and C state type for MTL SAMedia

2022-10-18 Thread Ashutosh Dixit
From: Badal Nilawar 

Add support for C6 residency and C state type for MTL SAMedia. Also add
mtl_drpc.

v2: Fixed review comments (Ashutosh)
v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R)
Remove MTL_CC_SHIFT (Ashutosh)
Adapt to RC6 residency register code refactor (Jani N)
v4: Move MTL branch to top in drpc_show

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 60 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  5 ++
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 17 --
 3 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 235d593cfaeba..c88d8ec62b692 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -256,6 +256,62 @@ static int ilk_drpc(struct seq_file *m)
return 0;
 }
 
+static int mtl_drpc(struct seq_file *m)
+{
+   struct intel_gt *gt = m->private;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 gt_core_status, rcctl1, global_forcewake;
+   u32 mtl_powergate_enable = 0, mtl_powergate_status = 0;
+
+   gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+
+   global_forcewake = intel_uncore_read(uncore, FORCEWAKE_GT_GEN9);
+
+   rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+   mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
+   mtl_powergate_status = intel_uncore_read(uncore,
+GEN9_PWRGT_DOMAIN_STATUS);
+
+   seq_printf(m, "RC6 Enabled: %s\n",
+  str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+   if (gt->type == GT_MEDIA) {
+   seq_printf(m, "Media Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_MEDIA_PG_ENABLE));
+   } else {
+   seq_printf(m, "Render Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_RENDER_PG_ENABLE));
+   }
+
+   seq_puts(m, "Current RC state: ");
+   switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) {
+   case MTL_CC0:
+   seq_puts(m, "on\n");
+   break;
+   case MTL_CC6:
+   seq_puts(m, "RC6\n");
+   break;
+   default:
+   seq_puts(m, "Unknown\n");
+   break;
+   }
+
+   if (gt->type == GT_MEDIA)
+   seq_printf(m, "Media Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+   else
+   seq_printf(m, "Render Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+
+   /* Works for both render and media gt's */
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+
+   seq_printf(m, "Global Forcewake Requests: 0x%x\n", global_forcewake);
+
+   return fw_domains_show(m, NULL);
+}
+
 static int drpc_show(struct seq_file *m, void *unused)
 {
struct intel_gt *gt = m->private;
@@ -264,7 +320,9 @@ static int drpc_show(struct seq_file *m, void *unused)
int err = -ENODEV;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   err = mtl_drpc(m);
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
err = vlv_drpc(m);
else if (GRAPHICS_VER(i915) >= 6)
err = gen6_drpc(m);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index b4b1b54ad738f..9f168867eb8ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -24,6 +24,9 @@
 /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
 #define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
 #define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+#define   MTL_CC0  0x0
+#define   MTL_CC6  0x3
+#define   MTL_CC_MASK  REG_GENMASK(12, 9)
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
@@ -1516,6 +1519,8 @@
 #define FORCEWAKE_MEDIA_VLV_MMIO(0x1300b8)
 #define FORCEWAKE_ACK_MEDIA_VLV_MMIO(0x1300bc)
 
+#define MTL_MEDIA_MC6  _MMIO(0x138048)
+
 #define GEN6_

[PATCH 0/4] i915: CAGF and RC6 changes for MTL

2022-10-18 Thread Ashutosh Dixit
This series includes the code changes to get CAGF, RC State and C6
Residency of MTL.

v3: Included "Use GEN12 RPSTAT register" patch

v4:
  - Rebased
  - Dropped "Use GEN12 RPSTAT register" patch from this series
going to send separate series for it

v5:
- Included "drm/i915/gt: Change RC6 residency functions to accept register
  ID's" based on code review feedback

v6:
- Addressed Jani N's review comments on "drm/i915/gt: Change RC6 residency
  functions to accept register ID's"
- Minor changes to other patches, please see individual patches for changelogs

Ashutosh Dixit (1):
  drm/i915/gt: Use RC6 residency types as arguments to residency
functions

Badal Nilawar (2):
  drm/i915/mtl: Modify CAGF functions for MTL
  drm/i915/mtl: C6 residency and C state type for MTL SAMedia

Don Hiatt (1):
  drm/i915: Use GEN12_RPSTAT register for GT freq

 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 87 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 11 +++
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 +--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 64 +-
 drivers/gpu/drm/i915/gt/intel_rc6.h   |  8 +-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 +++-
 drivers/gpu/drm/i915/gt/intel_rps.c   | 40 -
 drivers/gpu/drm/i915/gt/intel_rps.h   |  2 +
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  9 +-
 10 files changed, 188 insertions(+), 66 deletions(-)

-- 
2.38.0



[PATCH 1/4] drm/i915: Use GEN12_RPSTAT register for GT freq

2022-10-18 Thread Ashutosh Dixit
From: Don Hiatt 

On GEN12+ use GEN12_RPSTAT register to get actual resolved GT
freq. GEN12_RPSTAT does not require a forcewake and will return 0 freq if
GT is in RC6.

v2:
  - Fixed review comments(Ashutosh)
  - Added function intel_rps_read_rpstat_fw to read RPSTAT without
forcewake, required especially for GEN6_RPSTAT1 (Ashutosh, Tvrtko)
v3:
  - Updated commit title and message for more clarity (Ashutosh)
  - Replaced intel_rps_read_rpstat with direct read to GEN12_RPSTAT1 in
read_cagf (Ashutosh)

Cc: Don Hiatt 
Cc: Andi Shyti 
Signed-off-by: Don Hiatt 
Signed-off-by: Badal Nilawar 
Signed-off-by: Ashutosh Dixit 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
 drivers/gpu/drm/i915/gt/intel_rps.c | 32 +
 drivers/gpu/drm/i915/gt/intel_rps.h |  2 ++
 drivers/gpu/drm/i915/i915_pmu.c |  3 +--
 4 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 36d95b79022c0..a7a0129d0e3fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1543,6 +1543,8 @@
 
 #define GEN12_RPSTAT1  _MMIO(0x1381b4)
 #define   GEN12_VOLTAGE_MASK   REG_GENMASK(10, 0)
+#define   GEN12_CAGF_SHIFT 11
+#define   GEN12_CAGF_MASK  REG_GENMASK(19, 11)
 
 #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4))
 #define   GEN11_CSME   (31)
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2a..df21258976d86 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2068,12 +2068,34 @@ void intel_rps_sanitize(struct intel_rps *rps)
rps_disable_interrupts(rps);
 }
 
+u32 intel_rps_read_rpstat_fw(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read_fw(rps_to_gt(rps)->uncore, rpstat);
+}
+
+u32 intel_rps_read_rpstat(struct intel_rps *rps)
+{
+   struct drm_i915_private *i915 = rps_to_i915(rps);
+   i915_reg_t rpstat;
+
+   rpstat = (GRAPHICS_VER(i915) >= 12) ? GEN12_RPSTAT1 : GEN6_RPSTAT1;
+
+   return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
+}
+
 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
 {
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER(i915) >= 12)
+   cagf = (rpstat & GEN12_CAGF_MASK) >> GEN12_CAGF_SHIFT;
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = (rpstat >> 8) & 0xff;
else if (GRAPHICS_VER(i915) >= 9)
cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
@@ -2094,7 +2116,9 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+   if (GRAPHICS_VER(i915) >= 12) {
+   freq = intel_uncore_read(uncore, GEN12_RPSTAT1);
+   } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
@@ -2260,7 +2284,7 @@ static void rps_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
 
-   rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+   rpstat = intel_rps_read_rpstat(rps);
rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & 
GEN6_CURICONT_MASK;
rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & 
GEN6_CURBSYTAVG_MASK;
rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & 
GEN6_CURBSYTAVG_MASK;
@@ -2395,7 +2419,7 @@ static void slpc_frequency_dump(struct intel_rps *rps, 
struct drm_printer *p)
drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
   rps->pm_intrmsk_mbz);
-   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, 
GEN6_RPSTAT1));
+   drm_printf(p, "RPSTAT1: 0x%08x\n", intel_rps_read_rpstat(rps));
drm_printf(p, "RPNSWREQ: %dMHz\n", 
intel_rps_get_requested_frequency(rps));
drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
   intel_gpu_freq(rps, caps.min_freq));
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
b/drivers/gpu/drm/i915/gt/intel_rps.h
index 110300dfd4383..9e1cad9ba0e9c 100644
--- a/d

[PATCH 3/4] drm/i915/gt: Use RC6 residency types as arguments to residency functions

2022-10-18 Thread Ashutosh Dixit
Previously RC6 residency functions directly accepted RC6 residency register
MMIO offsets (there are four RC6 residency registers). This worked but
required an assumption on the residency register layout so was not future
proof.

Therefore change RC6 residency functions to accept RC6 residency types
instead of register MMIO offsets. The knowledge of register offsets as well
as ID to offset mapping is now maintained solely in intel_rc6 and can be
tailored for different platforms and different register layouts as need
arises.

v2: Address review comments by Jani N
- Change residency functions to accept RC6 residency types instead of
  register ID's
- s/intel_rc6_print_rc5_res/intel_rc6_print_residency/
- Remove "const enum" in function arguments
- Naming: intel_rc6_* for enum
- Use INTEL_RC6_RES_MAX and other minor changes

Suggested-by: Rodrigo Vivi 
Suggested-by: Jani Nikula 
Reported-by: Jani Nikula 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 ++--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 55 +++
 drivers/gpu/drm/i915/gt/intel_rc6.h   |  8 ++-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 15 -
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  6 +-
 7 files changed, 70 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 40d0a3be42acf..235d593cfaeba 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data)
 }
 DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
 
-static void print_rc6_res(struct seq_file *m,
- const char *title,
- const i915_reg_t reg)
-{
-   struct intel_gt *gt = m->private;
-   intel_wakeref_t wakeref;
-
-   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   seq_printf(m, "%s %u (%llu us)\n", title,
-  intel_uncore_read(gt->uncore, reg),
-  intel_rc6_residency_us(>rc6, reg));
-}
-
 static int vlv_drpc(struct seq_file *m)
 {
struct intel_gt *gt = m->private;
@@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m)
seq_printf(m, "Media Power Well: %s\n",
   (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
 
-   print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+   intel_rc6_print_residency(m, "Render RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "Media RC6 residency since boot:", 
INTEL_RC6_RES_VLV_MEDIA);
 
return fw_domains_show(m, NULL);
 }
@@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m)
}
 
/* Not exactly sure what this is */
-   print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
- GEN6_GT_GFX_RC6_LOCKED);
-   print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
-   print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+   intel_rc6_print_residency(m, "RC6 \"Locked to RPn\" residency since 
boot:",
+ INTEL_RC6_RES_RC6_LOCKED);
+   intel_rc6_print_residency(m, "RC6 residency since boot:", 
INTEL_RC6_RES_RC6);
+   intel_rc6_print_residency(m, "RC6+ residency since boot:", 
INTEL_RC6_RES_RC6p);
+   intel_rc6_print_residency(m, "RC6++ residency since boot:", 
INTEL_RC6_RES_RC6pp);
 
if (GRAPHICS_VER(i915) <= 7) {
seq_printf(m, "RC6   voltage: %dmV\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 9041609523697..19a6e052c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct 
device_attribute *attr,
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
 
 #ifdef CONFIG_PM
-static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
+static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id)
 {
intel_wakeref_t wakeref;
u64 res = 0;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   res = intel_rc6_residency_us(>rc6, reg);
+   res = intel_rc6_residency_us(>rc6, id);
 
retur

[PATCH 3/3] drm/i915/mtl: C6 residency and C state type for MTL SAMedia

2022-10-14 Thread Ashutosh Dixit
From: Badal Nilawar 

Add support for C6 residency and C state type for MTL SAMedia. Also add
mtl_drpc.

v2: Fixed review comments (Ashutosh)
v3: Sort registers and fix whitespace errors in intel_gt_regs.h (Matt R)
Remove MTL_CC_SHIFT (Ashutosh)
Adapt to RC6 residency register code refactor (Jani N)

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 57 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  5 ++
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 17 --
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 1fb053cbf52db..3a9bb4387248e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -256,6 +256,61 @@ static int ilk_drpc(struct seq_file *m)
return 0;
 }
 
+static int mtl_drpc(struct seq_file *m)
+{
+   struct intel_gt *gt = m->private;
+   struct intel_uncore *uncore = gt->uncore;
+   u32 gt_core_status, rcctl1, global_forcewake;
+   u32 mtl_powergate_enable = 0, mtl_powergate_status = 0;
+
+   gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
+
+   global_forcewake = intel_uncore_read(uncore, FORCEWAKE_GT_GEN9);
+
+   rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+   mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
+   mtl_powergate_status = intel_uncore_read(uncore,
+GEN9_PWRGT_DOMAIN_STATUS);
+
+   seq_printf(m, "RC6 Enabled: %s\n",
+  str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+   if (gt->type == GT_MEDIA) {
+   seq_printf(m, "Media Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_MEDIA_PG_ENABLE));
+   } else {
+   seq_printf(m, "Render Well Gating Enabled: %s\n",
+  str_yes_no(mtl_powergate_enable & 
GEN9_RENDER_PG_ENABLE));
+   }
+
+   seq_puts(m, "Current RC state: ");
+   switch (REG_FIELD_GET(MTL_CC_MASK, gt_core_status)) {
+   case MTL_CC0:
+   seq_puts(m, "on\n");
+   break;
+   case MTL_CC6:
+   seq_puts(m, "RC6\n");
+   break;
+   default:
+   seq_puts(m, "Unknown\n");
+   break;
+   }
+
+   if (gt->type == GT_MEDIA)
+   seq_printf(m, "Media Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+   else
+   seq_printf(m, "Render Power Well: %s\n",
+  (mtl_powergate_status &
+   GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+
+   intel_rc6_print_rc6_res(m, "RC6 residency since boot:", 
RC6_RES_REG_RC6);
+
+   seq_printf(m, "Global Forcewake Requests: 0x%x\n", global_forcewake);
+
+   return fw_domains_show(m, NULL);
+}
+
 static int drpc_show(struct seq_file *m, void *unused)
 {
struct intel_gt *gt = m->private;
@@ -266,6 +321,8 @@ static int drpc_show(struct seq_file *m, void *unused)
with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
err = vlv_drpc(m);
+   else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   err = mtl_drpc(m);
else if (GRAPHICS_VER(i915) >= 6)
err = gen6_drpc(m);
else
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 66867747f643e..0493ea324b846 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -11,6 +11,9 @@
 /* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
 #define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
 #define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+#define   MTL_CC0  0x0
+#define   MTL_CC6  0x3
+#define   MTL_CC_MASK  REG_GENMASK(12, 9)
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
@@ -1494,6 +1497,8 @@
 #define FORCEWAKE_MEDIA_VLV_MMIO(0x1300b8)
 #define FORCEWAKE_ACK_MEDIA_VLV_MMIO(0x1300bc)
 
+#define MTL_MEDIA_MC6  _MMIO(0x138048)
+
 #define GEN6_GT_THREAD_STATUS_REG  _MMIO(0x13805c)
 #define   GEN6_GT_THREAD_STATUS_CORE_MASK  0x7
 
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c 

[PATCH 2/3] drm/i915/mtl: Modify CAGF functions for MTL

2022-10-14 Thread Ashutosh Dixit
From: Badal Nilawar 

Update CAGF functions for MTL to get actual resolved frequency of 3D and
SAMedia.

v2: Update MTL_MIRROR_TARGET_WP1 position/formatting (MattR)
Move MTL branches in cagf functions to top (MattR)
Fix commit message (Andi)

Bspec: 66300

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 
 drivers/gpu/drm/i915/gt/intel_rps.c | 8 ++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 7f79bbf978284..66867747f643e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -8,6 +8,10 @@
 
 #include "i915_reg_defs.h"
 
+/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */
+#define MTL_MIRROR_TARGET_WP1  _MMIO(0xc60)
+#define   MTL_CAGF_MASKREG_GENMASK(8, 0)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index fc23c562d9b2a..59ca7e80e4c6f 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -2073,7 +2073,9 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 cagf;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   cagf = rpstat & MTL_CAGF_MASK;
+   else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
cagf = (rpstat >> 8) & 0xff;
else if (GRAPHICS_VER(i915) >= 9)
cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
@@ -2094,7 +2096,9 @@ static u32 read_cagf(struct intel_rps *rps)
struct intel_uncore *uncore = rps_to_uncore(rps);
u32 freq;
 
-   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+   freq = intel_uncore_read(rps_to_gt(rps)->uncore, 
MTL_MIRROR_TARGET_WP1);
+   } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
vlv_punit_get(i915);
freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
vlv_punit_put(i915);
-- 
2.38.0



[PATCH 0/3] i915: CAGF and RC6 changes for MTL

2022-10-14 Thread Ashutosh Dixit
This series includes the code changes to get CAGF, RC State and C6
Residency of MTL.

v2: Included "Use GEN12 RPSTAT register" patch

v3:
  - Rebased
  - Dropped "Use GEN12 RPSTAT register" patch from this series
going to send separate series for it

v4:
- Included "drm/i915/gt: Change RC6 residency functions to accept register
  ID's" based on code review feedback
- Addressed review comments, please see individual patches for changelogs

Ashutosh Dixit (1):
  drm/i915/gt: Change RC6 residency functions to accept register ID's

Badal Nilawar (2):
  drm/i915/mtl: Modify CAGF functions for MTL
  drm/i915/mtl: C6 residency and C state type for MTL SAMedia

 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 84 ++-
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  9 ++
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 +--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 65 +-
 drivers/gpu/drm/i915/gt/intel_rc6.h   |  9 +-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 10 +++
 drivers/gpu/drm/i915/gt/intel_rps.c   |  8 +-
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  6 +-
 9 files changed, 150 insertions(+), 59 deletions(-)

-- 
2.38.0



[PATCH 1/3] drm/i915/gt: Change RC6 residency functions to accept register ID's

2022-10-14 Thread Ashutosh Dixit
Previously RC6 residency functions directly accepted RC6 residency register
MMIO offsets (there are four RC6 residency registers). This worked but
required an assumption on the residency register layout so was not future
proof.

Therefore change RC6 residency functions to accept register ID's instead of
register MMIO offsets. The knowledge of register offsets as well as ID to
offset mapping is now maintained solely in intel_rc6 and can be tailored
for different platforms and different register layouts as need arises.

Suggested-by: Rodrigo Vivi 
Reported-by: Jani Nikula 
Signed-off-by: Ashutosh Dixit 
---
 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++--
 drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 12 ++--
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 56 +++
 drivers/gpu/drm/i915/gt/intel_rc6.h   |  9 ++-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h | 10 
 drivers/gpu/drm/i915/gt/selftest_rc6.c|  6 +-
 drivers/gpu/drm/i915/i915_pmu.c   |  6 +-
 7 files changed, 69 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 40d0a3be42acf..1fb053cbf52db 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -83,19 +83,6 @@ static int fw_domains_show(struct seq_file *m, void *data)
 }
 DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
 
-static void print_rc6_res(struct seq_file *m,
- const char *title,
- const i915_reg_t reg)
-{
-   struct intel_gt *gt = m->private;
-   intel_wakeref_t wakeref;
-
-   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   seq_printf(m, "%s %u (%llu us)\n", title,
-  intel_uncore_read(gt->uncore, reg),
-  intel_rc6_residency_us(>rc6, reg));
-}
-
 static int vlv_drpc(struct seq_file *m)
 {
struct intel_gt *gt = m->private;
@@ -115,8 +102,8 @@ static int vlv_drpc(struct seq_file *m)
seq_printf(m, "Media Power Well: %s\n",
   (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
 
-   print_rc6_res(m, "Render RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+   intel_rc6_print_rc6_res(m, "Render RC6 residency since boot:", 
RC6_RES_REG_RC6);
+   intel_rc6_print_rc6_res(m, "Media RC6 residency since boot:", 
VLV_RC6_RES_REG_MEDIA_RC6);
 
return fw_domains_show(m, NULL);
 }
@@ -192,11 +179,11 @@ static int gen6_drpc(struct seq_file *m)
}
 
/* Not exactly sure what this is */
-   print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
- GEN6_GT_GFX_RC6_LOCKED);
-   print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
-   print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
-   print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+   intel_rc6_print_rc6_res(m, "RC6 \"Locked to RPn\" residency since 
boot:",
+   RC6_RES_REG_RC6_LOCKED);
+   intel_rc6_print_rc6_res(m, "RC6 residency since boot:", 
RC6_RES_REG_RC6);
+   intel_rc6_print_rc6_res(m, "RC6+ residency since boot:", 
RC6_RES_REG_RC6p);
+   intel_rc6_print_rc6_res(m, "RC6++ residency since boot:", 
RC6_RES_REG_RC6pp);
 
if (GRAPHICS_VER(i915) <= 7) {
seq_printf(m, "RC6   voltage: %dmV\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 9041609523697..1ce5bfdc72282 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -93,13 +93,13 @@ sysfs_gt_attribute_r_func(struct device *dev, struct 
device_attribute *attr,
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
 
 #ifdef CONFIG_PM
-static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
+static u32 get_residency(struct intel_gt *gt, const enum rc6_res_reg id)
 {
intel_wakeref_t wakeref;
u64 res = 0;
 
with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-   res = intel_rc6_residency_us(>rc6, reg);
+   res = intel_rc6_residency_us(>rc6, id);
 
return DIV_ROUND_CLOSEST_ULL(res, 1000);
 }
@@ -123,7 +123,7 @@ static ssize_t rc6_enable_show(struct device *dev,
 
 static u32 __rc6_residency_ms_show(struct intel_gt *gt)
 {
-   return get_residency(gt, GEN6_GT_GFX_RC6);
+   return get_residency(gt, RC6_RES_REG_RC6);
 }
 
 static ssize_t rc6_residency_ms_show(struct device *dev,
@@ -138,7 +138,7 @@ static ssize_t rc6_residency_ms_show(struct dev

[PATCH 0/7] drm/i915: Add HWMON support

2022-10-13 Thread Ashutosh Dixit
This series adds the HWMON support for DGFX

Test-with: 20221013151400.2086268-1-ashutosh.di...@intel.com

v2:
  - Reorganized series. Created first patch as infrastructure patch
followed by feature patches. (Ashutosh)
  - Fixed review comments (Jani)
  - Fixed review comments (Ashutosh)

v3:
  - Fixed review comments from Guenter
  - Exposed energy inferface as standard hwmon interface (Ashutosh)
  - For power interface added entries for critical power and maintained
standard interface for all the entries except
power1_max_interval
  - Extended support for XEHPSDV (Ashutosh)

v4:
  - Fixed review comment from Guenter
  - Cleaned up unused code

v5:
  - Fixed review comments (Jani)

v6:
  - Fixed review comments (Ashutosh)
  - Updated date and kernel version in documentation

v7:
  - Fixed review comments (Anshuman)
  - KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko)

v8: s/hwmon_device_register_with_info/
  devm_hwmon_device_register_with_info/ (Ashutosh)

v9: Addressed review comments from Rodrigo and Andi

Ashutosh Dixit (2):
  drm/i915/hwmon: Expose card reactive critical power
  drm/i915/hwmon: Expose power1_max_interval

Dale B Stimson (4):
  drm/i915/hwmon: Add HWMON infrastructure
  drm/i915/hwmon: Power PL1 limit and TDP setting
  drm/i915/hwmon: Show device level energy usage
  drm/i915/hwmon: Extend power/energy for XEHPSDV

Riana Tauro (1):
  drm/i915/hwmon: Add HWMON current voltage support

 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  75 ++
 MAINTAINERS   |   1 +
 drivers/gpu/drm/i915/Makefile |   3 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |   8 +
 drivers/gpu/drm/i915/i915_driver.c|   5 +
 drivers/gpu/drm/i915/i915_drv.h   |   2 +
 drivers/gpu/drm/i915/i915_hwmon.c | 738 ++
 drivers/gpu/drm/i915/i915_hwmon.h |  20 +
 drivers/gpu/drm/i915/i915_reg.h   |   6 +
 drivers/gpu/drm/i915/intel_mchbar_regs.h  |  21 +
 10 files changed, 879 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
 create mode 100644 drivers/gpu/drm/i915/i915_hwmon.c
 create mode 100644 drivers/gpu/drm/i915/i915_hwmon.h

-- 
2.38.0



[PATCH 1/7] drm/i915/hwmon: Add HWMON infrastructure

2022-10-13 Thread Ashutosh Dixit
From: Dale B Stimson 

The i915 HWMON module will be used to expose voltage, power and energy
values for dGfx. Here we set up i915 hwmon infrastructure including i915
hwmon registration, basic data structures and functions.

v2:
  - Create HWMON infra patch (Ashutosh)
  - Fixed review comments (Jani)
  - Remove "select HWMON" from i915/Kconfig (Jani)
v3: Use hwm_ prefix for static functions (Ashutosh)
v4: s/#ifdef CONFIG_HWMON/#if IS_REACHABLE(CONFIG_HWMON)/ since the former
doesn't work if hwmon is compiled as a module (Guenter)
v5: Fixed review comments (Jani)
v6: s/kzalloc/devm_kzalloc/ (Andi)
v7: s/hwmon_device_register_with_info/
  devm_hwmon_device_register_with_info/ (Ashutosh)

Cc: Guenter Roeck 
Signed-off-by: Dale B Stimson 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Riana Tauro 
Signed-off-by: Badal Nilawar 
Acked-by: Guenter Roeck 
Reviewed-by: Ashutosh Dixit 
Reviewed-by: Anshuman Gupta 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/Makefile  |   3 +
 drivers/gpu/drm/i915/i915_driver.c |   5 ++
 drivers/gpu/drm/i915/i915_drv.h|   2 +
 drivers/gpu/drm/i915/i915_hwmon.c  | 122 +
 drivers/gpu/drm/i915/i915_hwmon.h  |  20 +
 5 files changed, 152 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_hwmon.c
 create mode 100644 drivers/gpu/drm/i915/i915_hwmon.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8cc1eb52626e..2535593ab379e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -209,6 +209,9 @@ i915-y += gt/uc/intel_uc.o \
 # graphics system controller (GSC) support
 i915-y += gt/intel_gsc.o
 
+# graphics hardware monitoring (HWMON) support
+i915-$(CONFIG_HWMON) += i915_hwmon.o
+
 # modesetting core code
 i915-y += \
display/hsw_ips.o \
diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index 24d3d2d85fd57..49868dc51 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -81,6 +81,7 @@
 #include "i915_drm_client.h"
 #include "i915_drv.h"
 #include "i915_getparam.h"
+#include "i915_hwmon.h"
 #include "i915_ioc32.h"
 #include "i915_ioctl.h"
 #include "i915_irq.h"
@@ -763,6 +764,8 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
for_each_gt(gt, dev_priv, i)
intel_gt_driver_register(gt);
 
+   i915_hwmon_register(dev_priv);
+
intel_display_driver_register(dev_priv);
 
intel_power_domains_enable(dev_priv);
@@ -795,6 +798,8 @@ static void i915_driver_unregister(struct drm_i915_private 
*dev_priv)
for_each_gt(gt, dev_priv, i)
intel_gt_driver_unregister(gt);
 
+   i915_hwmon_unregister(dev_priv);
+
i915_perf_unregister(dev_priv);
i915_pmu_unregister(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90ed8e6db2fe0..a81372ddd2db7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -349,6 +349,8 @@ struct drm_i915_private {
 
struct i915_perf perf;
 
+   struct i915_hwmon *hwmon;
+
/* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
struct intel_gt gt0;
 
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
new file mode 100644
index 0..231552fda374a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "i915_hwmon.h"
+#include "i915_reg.h"
+#include "intel_mchbar_regs.h"
+
+struct hwm_reg {
+};
+
+struct hwm_drvdata {
+   struct i915_hwmon *hwmon;
+   struct intel_uncore *uncore;
+   struct device *hwmon_dev;
+   char name[12];
+};
+
+struct i915_hwmon {
+   struct hwm_drvdata ddat;
+   struct mutex hwmon_lock;/* counter overflow logic and 
rmw */
+   struct hwm_reg rg;
+};
+
+static const struct hwmon_channel_info *hwm_info[] = {
+   NULL
+};
+
+static umode_t
+hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+  u32 attr, int channel)
+{
+   switch (type) {
+   default:
+   return 0;
+   }
+}
+
+static int
+hwm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+int channel, long *val)
+{
+   switch (type) {
+   default:
+   return -EOPNOTSUPP;
+   }
+}
+
+static int
+hwm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, long val)
+{
+   switch (type) {
+   default:
+   return -EOPNOTSUPP;
+   }
+}
+
+static const struct hwmon_ops hwm_ops = {
+   .is_visible = hwm_is_visible,
+   .read = hwm_read,
+  

[PATCH 4/7] drm/i915/hwmon: Show device level energy usage

2022-10-13 Thread Ashutosh Dixit
From: Dale B Stimson 

Use i915 HWMON to display device level energy input.

v2: Updated the date and kernel version in feature description
v3:
  - Cleaned up hwm_energy function and removed unused function
i915_hwmon_energy_status_get (Ashutosh)
v4: KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko)
v5: Change contact to intel-gfx (Rodrigo)
Change return type of hwm_energy to void (Andi)

Signed-off-by: Dale B Stimson 
Signed-off-by: Ashutosh Dixit 
Signed-off-by: Riana Tauro 
Signed-off-by: Badal Nilawar 
Acked-by: Guenter Roeck 
Reviewed-by: Ashutosh Dixit 
Reviewed-by: Anshuman Gupta 
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |   8 ++
 drivers/gpu/drm/i915/i915_hwmon.c | 106 +-
 drivers/gpu/drm/i915/intel_mchbar_regs.h  |   2 +
 3 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index e0b1af4ec6d84..aa00c558495b3 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -25,3 +25,11 @@ Contact: intel-...@lists.freedesktop.org
 Description:   RO. Card default power limit (default TDP setting).
 
Only supported for particular Intel i915 graphics platforms.
+
+What:  /sys/devices/.../hwmon/hwmon/energy1_input
+Date:  February 2023
+KernelVersion: 6.2
+Contact:   intel-...@lists.freedesktop.org
+Description:   RO. Energy input of device in microjoules.
+
+   Only supported for particular Intel i915 graphics platforms.
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index db254413a07da..d8d30daa37944 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -17,21 +17,30 @@
  * SF_* - scale factors for particular quantities according to hwmon spec.
  * - voltage  - millivolts
  * - power  - microwatts
+ * - energy - microjoules
  */
 #define SF_VOLTAGE 1000
 #define SF_POWER   100
+#define SF_ENERGY  100
 
 struct hwm_reg {
i915_reg_t gt_perf_status;
i915_reg_t pkg_power_sku_unit;
i915_reg_t pkg_power_sku;
i915_reg_t pkg_rapl_limit;
+   i915_reg_t energy_status_all;
+};
+
+struct hwm_energy_info {
+   u32 reg_val_prev;
+   long accum_energy;  /* Accumulated energy for 
energy1_input */
 };
 
 struct hwm_drvdata {
struct i915_hwmon *hwmon;
struct intel_uncore *uncore;
struct device *hwmon_dev;
+   struct hwm_energy_info ei;  /*  Energy info for 
energy1_input */
char name[12];
 };
 
@@ -40,6 +49,7 @@ struct i915_hwmon {
struct mutex hwmon_lock;/* counter overflow logic and 
rmw */
struct hwm_reg rg;
int scl_shift_power;
+   int scl_shift_energy;
 };
 
 static void
@@ -98,9 +108,58 @@ hwm_field_scale_and_write(struct hwm_drvdata *ddat, 
i915_reg_t rgadr,
bits_to_clear, bits_to_set);
 }
 
+/*
+ * hwm_energy - Obtain energy value
+ *
+ * The underlying energy hardware register is 32-bits and is subject to
+ * overflow. How long before overflow? For example, with an example
+ * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
+ * a power draw of 1000 watts, the 32-bit counter will overflow in
+ * approximately 4.36 minutes.
+ *
+ * Examples:
+ *1 watt:  (2^32 >> 14) /1 W / (60 * 60 * 24) secs/day -> 3 days
+ * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes
+ *
+ * The function significantly increases overflow duration (from 4.36
+ * minutes) by accumulating the energy register into a 'long' as allowed by
+ * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
+ * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
+ * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
+ * energy1_input overflows. This at 1000 W is an overflow duration of 278 
years.
+ */
+static void
+hwm_energy(struct hwm_drvdata *ddat, long *energy)
+{
+   struct intel_uncore *uncore = ddat->uncore;
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   struct hwm_energy_info *ei = >ei;
+   intel_wakeref_t wakeref;
+   i915_reg_t rgaddr;
+   u32 reg_val;
+
+   rgaddr = hwmon->rg.energy_status_all;
+
+   mutex_lock(>hwmon_lock);
+
+   with_intel_runtime_pm(uncore->rpm, wakeref)
+   reg_val = intel_uncore_read(uncore, rgaddr);
+
+   if (reg_val >= ei->reg_val_prev)
+   ei->accum_energy += reg_val - ei->reg_val_prev;
+   else
+   ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+   ei->reg_val_prev = reg_val;
+
+   *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+

[PATCH 2/7] drm/i915/hwmon: Add HWMON current voltage support

2022-10-13 Thread Ashutosh Dixit
From: Riana Tauro 

Use i915 HWMON subsystem to display current input voltage.

v2:
  - Updated date and kernel version in feature description
  - Fixed review comments (Ashutosh)
v3: Use macro HWMON_CHANNEL_INFO to define hwmon channel (Guenter)
v4:
  - Fixed review comments (Ashutosh)
  - Use hwm_ prefix for static functions (Ashutosh)
v5: Added unit of voltage as millivolts (Ashutosh)
v6: KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko)
v7: Change contact to intel-gfx (Rodrigo)
GEN12_RPSTAT1 is available for all Gen12+ (Andi)
Added Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
to MAINTAINERS

Cc: Guenter Roeck 
Cc: Anshuman Gupta 
Signed-off-by: Riana Tauro 
Signed-off-by: Badal Nilawar 
Signed-off-by: Ashutosh Dixit 
Acked-by: Guenter Roeck 
Reviewed-by: Ashutosh Dixit 
Reviewed-by: Anshuman Gupta 
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |  7 +++
 MAINTAINERS   |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  3 ++
 drivers/gpu/drm/i915/i915_hwmon.c | 53 +++
 4 files changed, 64 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
new file mode 100644
index 0..5f4b136f08509
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -0,0 +1,7 @@
+What:  /sys/devices/.../hwmon/hwmon/in0_input
+Date:  February 2023
+KernelVersion: 6.2
+Contact:   intel-...@lists.freedesktop.org
+Description:   RO. Current Voltage in millivolt.
+
+   Only supported for particular Intel i915 graphics platforms.
diff --git a/MAINTAINERS b/MAINTAINERS
index f07a8bf8744f9..7d57ede980940 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10144,6 +10144,7 @@ Q:  
http://patchwork.freedesktop.org/project/intel-gfx/
 B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
 C: irc://irc.oftc.net/intel-gfx
 T: git git://anongit.freedesktop.org/drm-intel
+F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
 F: Documentation/gpu/i915.rst
 F: drivers/gpu/drm/i915/
 F: include/drm/i915*
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 7f79bbf978284..fcf5f9012852f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1519,6 +1519,9 @@
 #define VLV_RENDER_C0_COUNT_MMIO(0x138118)
 #define VLV_MEDIA_C0_COUNT _MMIO(0x13811c)
 
+#define GEN12_RPSTAT1  _MMIO(0x1381b4)
+#define   GEN12_VOLTAGE_MASK   REG_GENMASK(10, 0)
+
 #define GEN11_GT_INTR_DW(x)_MMIO(0x190018 + ((x) * 4))
 #define   GEN11_CSME   (31)
 #define   GEN11_GUNIT  (28)
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 231552fda374a..025399391ddcc 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -11,8 +11,16 @@
 #include "i915_hwmon.h"
 #include "i915_reg.h"
 #include "intel_mchbar_regs.h"
+#include "gt/intel_gt_regs.h"
+
+/*
+ * SF_* - scale factors for particular quantities according to hwmon spec.
+ * - voltage  - millivolts
+ */
+#define SF_VOLTAGE 1000
 
 struct hwm_reg {
+   i915_reg_t gt_perf_status;
 };
 
 struct hwm_drvdata {
@@ -29,14 +37,51 @@ struct i915_hwmon {
 };
 
 static const struct hwmon_channel_info *hwm_info[] = {
+   HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
NULL
 };
 
+static umode_t
+hwm_in_is_visible(const struct hwm_drvdata *ddat, u32 attr)
+{
+   struct drm_i915_private *i915 = ddat->uncore->i915;
+
+   switch (attr) {
+   case hwmon_in_input:
+   return IS_DG1(i915) || IS_DG2(i915) ? 0444 : 0;
+   default:
+   return 0;
+   }
+}
+
+static int
+hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val)
+{
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
+   u32 reg_value;
+
+   switch (attr) {
+   case hwmon_in_input:
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   reg_value = intel_uncore_read(ddat->uncore, 
hwmon->rg.gt_perf_status);
+   /* HW register value in units of 2.5 millivolt */
+   *val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, 
reg_value) * 25, 10);
+   return 0;
+   default:
+   return -EOPNOTSUPP;
+   }
+}
+
 static umode_t
 hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type,
   u32 attr, int channel)
 {
+   struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
+
switch (type) {
+   case hwmon_in:
+   

[PATCH 6/7] drm/i915/hwmon: Expose power1_max_interval

2022-10-13 Thread Ashutosh Dixit
Expose power1_max_interval, that is the tau corresponding to PL1, as a
custom hwmon attribute. Some bit manipulation is needed because of the
format of PKG_PWR_LIM_1_TIME in
GT0_PACKAGE_RAPL_LIMIT register (1.x * power(2,y)).

v2: Update date and kernel version in Documentation (Badal)
v3: Cleaned up hwm_power1_max_interval_store() (Badal)
v4:
  - Fixed review comments (Anshuman)
  - In hwm_power1_max_interval_store() get PKG_MAX_WIN from
pkg_power_sku when it is valid (Ashutosh)
  - KernelVersion: 6.2, Date: February 2023 in doc (Tvrtko)
v5: On some of the DGFX setups it is seen that although pkg_power_sku
is valid the field PKG_WIN_MAX is not populated. So it is
decided to stick to default value of PKG_WIN_MAX (Ashutosh)
v6: Change contact to intel-gfx (Rodrigo)
Fixed variable types in hwm_power1_max_interval_store (Andi)
Documented PKG_MAX_WIN_DEFAULT (Andi)
Removed else in hwm_attributes_visible (Andi)

Signed-off-by: Ashutosh Dixit 
Signed-off-by: Badal Nilawar 
Acked-by: Guenter Roeck 
Reviewed-by: Anshuman Gupta 
---
 .../ABI/testing/sysfs-driver-intel-i915-hwmon |   9 ++
 drivers/gpu/drm/i915/i915_hwmon.c | 119 +-
 drivers/gpu/drm/i915/intel_mchbar_regs.h  |   7 ++
 3 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
index a7a6512fcc8ca..9dc5ff14107bb 100644
--- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -26,6 +26,15 @@ Description: RO. Card default power limit (default TDP 
setting).
 
Only supported for particular Intel i915 graphics platforms.
 
+What:  /sys/devices/.../hwmon/hwmon/power1_max_interval
+Date:  February 2023
+KernelVersion: 6.2
+Contact:   intel-...@lists.freedesktop.org
+Description:   RW. Sustained power limit interval (Tau in PL1/Tau) in
+   milliseconds over which sustained power is averaged.
+
+   Only supported for particular Intel i915 graphics platforms.
+
 What:  /sys/devices/.../hwmon/hwmon/power1_crit
 Date:  February 2023
 KernelVersion: 6.2
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
b/drivers/gpu/drm/i915/i915_hwmon.c
index 2b24a7a711400..58f80380e5427 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -20,11 +20,13 @@
  * - power  - microwatts
  * - curr   - milliamperes
  * - energy - microjoules
+ * - time   - milliseconds
  */
 #define SF_VOLTAGE 1000
 #define SF_POWER   100
 #define SF_CURR1000
 #define SF_ENERGY  100
+#define SF_TIME1000
 
 struct hwm_reg {
i915_reg_t gt_perf_status;
@@ -53,6 +55,7 @@ struct i915_hwmon {
struct hwm_reg rg;
int scl_shift_power;
int scl_shift_energy;
+   int scl_shift_time;
 };
 
 static void
@@ -159,6 +162,119 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy)
mutex_unlock(>hwmon_lock);
 }
 
+static ssize_t
+hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   intel_wakeref_t wakeref;
+   u32 r, x, y, x_w = 2; /* 2 bits */
+   u64 tau4, out;
+
+   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+   r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
+
+   x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
+   y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+   /*
+* tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+* = (4 | x) << (y - 2)
+* where (y - 2) ensures a 1.x fixed point representation of 1.x
+* However because y can be < 2, we compute
+* tau4 = (4 | x) << y
+* but add 2 when doing the final right shift to account for units
+*/
+   tau4 = ((1 << x_w) | x) << y;
+   /* val in hwmon interface units (millisec) */
+   out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+   return sysfs_emit(buf, "%llu\n", out);
+}
+
+static ssize_t
+hwm_power1_max_interval_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+   struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+   struct i915_hwmon *hwmon = ddat->hwmon;
+   u32 x, y, rxy, x_w = 2; /* 2 bits */
+   u64 tau4, r, max_win;
+   unsigned long val;
+   int ret;
+
+   ret = kstrtoul(buf, 0, );
+   if (ret)
+   return ret;
+
+   /*
+* Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12
+* The hwmon->scl_shift_time default of 0xa results in a max tau 

  1   2   >