[RFC PATCH] drm/msm: tune the devfreq to take into account the load history

2023-02-20 Thread Dmitry Baryshkov
Partially restore the handling of the GPU load history. Accumulate the
busy_time and and total_time measured in active state during the polling
period. This results in slightly smoother picture of the GPU frequencies
(measured on the a530/msm8996, using kmscube in different resolutions).

A call to msm_devfreq_get_dev_status() from msm_devfreq_active() was
removed in 69f06a5d854f ("drm/msm: remove explicit devfreq status
reset"), because dev_pm_qos_update_request() triggered that internally.
As the commit fadcc3ab1302 ("drm/msm/gpu: Bypass PM QoS constraint for
idle clamp") removed the calls to dev_pm_qos_update_request(), this
removal was also reverted.

The code doesn't take the frequency into account while accumulating the
data to keep the code simple for the RFC.

Cc: Chia-I Wu 
Signed-off-by: Dmitry Baryshkov 
---
 drivers/gpu/drm/msm/msm_gpu.h |  3 +++
 drivers/gpu/drm/msm/msm_gpu_devfreq.c | 30 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index fc1c0d8611a8..9d1783375dcc 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -156,6 +156,9 @@ struct msm_gpu_devfreq {
 
/** suspended: tracks if we're suspended */
bool suspended;
+
+   /* stats for the current period */
+   struct devfreq_dev_status status;
 };
 
 struct msm_gpu {
diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c 
b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
index e27dbf12b5e8..92cb022c8628 100644
--- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
+++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
@@ -74,7 +74,7 @@ static unsigned long get_freq(struct msm_gpu *gpu)
return clk_get_rate(gpu->core_clk);
 }
 
-static int msm_devfreq_get_dev_status(struct device *dev,
+static int msm_devfreq_get_dev_status_int(struct device *dev,
struct devfreq_dev_status *status)
 {
struct msm_gpu *gpu = dev_to_gpu(dev);
@@ -112,6 +112,22 @@ static int msm_devfreq_get_dev_status(struct device *dev,
return 0;
 }
 
+static int msm_devfreq_get_dev_status(struct device *dev,
+   struct devfreq_dev_status *status)
+{
+   struct msm_gpu *gpu = dev_to_gpu(dev);
+   struct msm_gpu_devfreq *df = >devfreq;
+
+   msm_devfreq_get_dev_status_int(>pdev->dev, status);
+   status->busy_time += df->status.busy_time;
+   status->total_time += df->status.total_time;
+
+   df->status.busy_time = 0;
+   df->status.total_time = 0;
+
+   return 0;
+}
+
 static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
 {
*freq = get_freq(dev_to_gpu(dev));
@@ -290,6 +306,7 @@ void msm_devfreq_active(struct msm_gpu *gpu)
struct msm_gpu_devfreq *df = >devfreq;
unsigned int idle_time;
unsigned long target_freq;
+   struct devfreq_dev_status status;
 
if (!has_devfreq(gpu))
return;
@@ -319,6 +336,12 @@ void msm_devfreq_active(struct msm_gpu *gpu)
if (target_freq)
msm_devfreq_target(>pdev->dev, _freq, 0);
 
+   /*
+* Reset the polling interval so we aren't inconsistent
+* about freq vs busy/total cycles
+*/
+   msm_devfreq_get_dev_status_int(>pdev->dev, );
+
mutex_unlock(>devfreq->lock);
 
/*
@@ -339,6 +362,7 @@ static void msm_devfreq_idle_work(struct kthread_work *work)
struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq);
struct msm_drm_private *priv = gpu->dev->dev_private;
unsigned long idle_freq, target_freq = 0;
+   struct devfreq_dev_status status;
 
/*
 * Hold devfreq lock to synchronize with get_dev_status()/
@@ -346,6 +370,10 @@ static void msm_devfreq_idle_work(struct kthread_work 
*work)
 */
mutex_lock(>devfreq->lock);
 
+   msm_devfreq_get_dev_status_int(>pdev->dev, );
+   df->status.busy_time += status.busy_time;
+   df->status.total_time += status.total_time;
+
idle_freq = get_freq(gpu);
 
if (priv->gpu_clamp_to_idle)
-- 
2.30.2



[PATCH] drm/amd/display: Clean up some inconsistent indenting

2023-02-20 Thread Jiapeng Chong
No functional modification involved.

drivers/gpu/drm/gma500/cdv_device.c:218 cdv_errata() warn: inconsistent 
indenting.

Reported-by: Abaci Robot 
Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4126
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/gma500/cdv_device.c | 27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/gma500/cdv_device.c 
b/drivers/gpu/drm/gma500/cdv_device.c
index 3e83299113e3..765f359365b9 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -78,7 +78,8 @@ static u32 cdv_get_max_backlight(struct drm_device *dev)
if (max == 0) {
DRM_DEBUG_KMS("LVDS Panel PWM value is 0!\n");
/* i915 does this, I believe which means that we should not
-* smash PWM control as firmware will take control of it. */
+* smash PWM control as firmware will take control of it.
+*/
return 1;
}
 
@@ -149,6 +150,7 @@ static inline u32 CDV_MSG_READ32(int domain, uint port, 
uint offset)
int mcr = (0x10<<24) | (port << 16) | (offset << 8);
uint32_t ret_val = 0;
struct pci_dev *pci_root = pci_get_domain_bus_and_slot(domain, 0, 0);
+
pci_write_config_dword(pci_root, 0xD0, mcr);
pci_read_config_dword(pci_root, 0xD4, _val);
pci_dev_put(pci_root);
@@ -160,6 +162,7 @@ static inline void CDV_MSG_WRITE32(int domain, uint port, 
uint offset,
 {
int mcr = (0x11<<24) | (port << 16) | (offset << 8) | 0xF0;
struct pci_dev *pci_root = pci_get_domain_bus_and_slot(domain, 0, 0);
+
pci_write_config_dword(pci_root, 0xD4, value);
pci_write_config_dword(pci_root, 0xD0, mcr);
pci_dev_put(pci_root);
@@ -180,10 +183,8 @@ static void cdv_init_pm(struct drm_device *dev)
int domain = pci_domain_nr(pdev->bus);
int i;
 
-   dev_priv->apm_base = CDV_MSG_READ32(domain, PSB_PUNIT_PORT,
-   PSB_APMBA) & 0x;
-   dev_priv->ospm_base = CDV_MSG_READ32(domain, PSB_PUNIT_PORT,
-   PSB_OSPMBA) & 0x;
+   dev_priv->apm_base = CDV_MSG_READ32(domain, PSB_PUNIT_PORT, PSB_APMBA) 
& 0x;
+   dev_priv->ospm_base = CDV_MSG_READ32(domain, PSB_PUNIT_PORT, 
PSB_OSPMBA) & 0x;
 
/* Power status */
pwr_cnt = inl(dev_priv->apm_base + PSB_APM_CMD);
@@ -196,6 +197,7 @@ static void cdv_init_pm(struct drm_device *dev)
/* Wait for the GPU power */
for (i = 0; i < 5; i++) {
u32 pwr_sts = inl(dev_priv->apm_base + PSB_APM_STS);
+
if ((pwr_sts & PSB_PWRGT_GFX_MASK) == 0)
return;
udelay(10);
@@ -215,7 +217,7 @@ static void cdv_errata(struct drm_device *dev)
 *  Bonus Launch to work around the issue, by degrading
 *  performance.
 */
-CDV_MSG_WRITE32(pci_domain_nr(pdev->bus), 3, 0x30, 0x08027108);
+   CDV_MSG_WRITE32(pci_domain_nr(pdev->bus), 3, 0x30, 0x08027108);
 }
 
 /**
@@ -401,20 +403,21 @@ static int cdv_power_up(struct drm_device *dev)
 
 static void cdv_hotplug_work_func(struct work_struct *work)
 {
-struct drm_psb_private *dev_priv = container_of(work, struct 
drm_psb_private,
+   struct drm_psb_private *dev_priv = container_of(work, struct 
drm_psb_private,
hotplug_work);
struct drm_device *dev = _priv->dev;
 
-/* Just fire off a uevent and let userspace tell us what to do */
-drm_helper_hpd_irq_event(dev);
+   /* Just fire off a uevent and let userspace tell us what to do */
+   drm_helper_hpd_irq_event(dev);
 }
 
 /* The core driver has received a hotplug IRQ. We are in IRQ context
-   so extract the needed information and kick off queued processing */
-
+ * so extract the needed information and kick off queued processing
+ */
 static int cdv_hotplug_event(struct drm_device *dev)
 {
struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
+
schedule_work(_priv->hotplug_work);
REG_WRITE(PORT_HOTPLUG_STAT, REG_READ(PORT_HOTPLUG_STAT));
return 1;
@@ -424,6 +427,7 @@ static void cdv_hotplug_enable(struct drm_device *dev, bool 
on)
 {
if (on) {
u32 hotplug = REG_READ(PORT_HOTPLUG_EN);
+
hotplug |= HDMIB_HOTPLUG_INT_EN | HDMIC_HOTPLUG_INT_EN |
   HDMID_HOTPLUG_INT_EN | CRT_HOTPLUG_INT_EN;
REG_WRITE(PORT_HOTPLUG_EN, hotplug);
@@ -549,6 +553,7 @@ static const struct psb_offset cdv_regmap[2] = {
 static int cdv_chip_setup(struct drm_device *dev)
 {
struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
+
INIT_WORK(_priv->hotplug_work, cdv_hotplug_work_func);
 
dev_priv->use_msi = true;
-- 
2.20.1.7.g153144c



Re: [PATCH 1/2] dt-bindings: display/panel: Add Lenovo NT36523W BOE panel

2023-02-20 Thread Konrad Dybcio



On 21.02.2023 03:08, Rob Herring wrote:
> On Fri, Feb 17, 2023 at 12:29:07PM +0100, Konrad Dybcio wrote:
>> Add bindings for the 2000x1200px IPS panel found on Lenovo Tab P11/
>> XiaoXin Pad devices.
>>
>> Signed-off-by: Konrad Dybcio 
>> ---
>>  .../display/panel/lenovo,nt36523w-boe-j606.yaml| 60 
>> ++
>>  1 file changed, 60 insertions(+)
>>
>> diff --git 
>> a/Documentation/devicetree/bindings/display/panel/lenovo,nt36523w-boe-j606.yaml
>>  
>> b/Documentation/devicetree/bindings/display/panel/lenovo,nt36523w-boe-j606.yaml
>> new file mode 100644
>> index ..43dcbe3f9f30
>> --- /dev/null
>> +++ 
>> b/Documentation/devicetree/bindings/display/panel/lenovo,nt36523w-boe-j606.yaml
>> @@ -0,0 +1,60 @@
>> +# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
>> +%YAML 1.2
>> +---
>> +$id: 
>> http://devicetree.org/schemas/display/panel/lenovo,nt36523w-boe-j606.yaml#
>> +$schema: http://devicetree.org/meta-schemas/core.yaml#
>> +
>> +title: NT36523W BOE panel found on Lenovo J606 devices
>> +
>> +maintainers:
>> +  - Konrad Dybcio 
>> +
>> +allOf:
>> +  - $ref: panel-common.yaml#
>> +
>> +properties:
>> +  compatible:
>> +const: lenovo,nt36523w-boe-j606
>> +
>> +  reg:
>> +maxItems: 1
>> +description: DSI virtual channel
>> +
>> +  vddio-supply: true
> 
> If only one supply, why not use panel-simple-dsi.yaml? Though probably 
> there's more than just an IO supply?
Display regulators (AB/IBB/AMOLEDB, not to be confused with LAB/IBB)
on modern Qualcomm platforms are controlled by the secure firmware
layer (yes, you read that correctly). Some panels require additional
supplies, but this one just has one positive, one negative and one
I/O regulator.

Konrad
> 
> Rob


Re: [PATCH 1/2] dt-bindings: display/panel: Add Lenovo NT36523W BOE panel

2023-02-20 Thread Rob Herring
On Fri, Feb 17, 2023 at 12:29:07PM +0100, Konrad Dybcio wrote:
> Add bindings for the 2000x1200px IPS panel found on Lenovo Tab P11/
> XiaoXin Pad devices.
> 
> Signed-off-by: Konrad Dybcio 
> ---
>  .../display/panel/lenovo,nt36523w-boe-j606.yaml| 60 
> ++
>  1 file changed, 60 insertions(+)
> 
> diff --git 
> a/Documentation/devicetree/bindings/display/panel/lenovo,nt36523w-boe-j606.yaml
>  
> b/Documentation/devicetree/bindings/display/panel/lenovo,nt36523w-boe-j606.yaml
> new file mode 100644
> index ..43dcbe3f9f30
> --- /dev/null
> +++ 
> b/Documentation/devicetree/bindings/display/panel/lenovo,nt36523w-boe-j606.yaml
> @@ -0,0 +1,60 @@
> +# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
> +%YAML 1.2
> +---
> +$id: 
> http://devicetree.org/schemas/display/panel/lenovo,nt36523w-boe-j606.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: NT36523W BOE panel found on Lenovo J606 devices
> +
> +maintainers:
> +  - Konrad Dybcio 
> +
> +allOf:
> +  - $ref: panel-common.yaml#
> +
> +properties:
> +  compatible:
> +const: lenovo,nt36523w-boe-j606
> +
> +  reg:
> +maxItems: 1
> +description: DSI virtual channel
> +
> +  vddio-supply: true

If only one supply, why not use panel-simple-dsi.yaml? Though probably 
there's more than just an IO supply?

Rob


Re: linux-6.2-rc4+ hangs on poweroff/reboot: Bisected

2023-02-20 Thread Ben Skeggs
On Mon, 20 Feb 2023 at 21:27, Karol Herbst  wrote:
>
> On Mon, Feb 20, 2023 at 11:51 AM Chris Clayton  
> wrote:
> >
> >
> >
> > On 20/02/2023 05:35, Ben Skeggs wrote:
> > > On Sun, 19 Feb 2023 at 04:55, Chris Clayton  
> > > wrote:
> > >>
> > >>
> > >>
> > >> On 18/02/2023 15:19, Chris Clayton wrote:
> > >>>
> > >>>
> > >>> On 18/02/2023 12:25, Karol Herbst wrote:
> >  On Sat, Feb 18, 2023 at 1:22 PM Chris Clayton 
> >   wrote:
> > >
> > >
> > >
> > > On 15/02/2023 11:09, Karol Herbst wrote:
> > >> On Wed, Feb 15, 2023 at 11:36 AM Linux regression tracking #update
> > >> (Thorsten Leemhuis)  wrote:
> > >>>
> > >>> On 13.02.23 10:14, Chris Clayton wrote:
> >  On 13/02/2023 02:57, Dave Airlie wrote:
> > > On Sun, 12 Feb 2023 at 00:43, Chris Clayton 
> > >  wrote:
> > >>
> > >>
> > >>
> > >> On 10/02/2023 19:33, Linux regression tracking (Thorsten 
> > >> Leemhuis) wrote:
> > >>> On 10.02.23 20:01, Karol Herbst wrote:
> >  On Fri, Feb 10, 2023 at 7:35 PM Linux regression tracking 
> >  (Thorsten
> >  Leemhuis)  wrote:
> > >
> > > On 08.02.23 09:48, Chris Clayton wrote:
> > >>
> > >> I'm assuming  that we are not going to see a fix for this 
> > >> regression before 6.2 is released.
> > >
> > > Yeah, looks like it. That's unfortunate, but happens. But 
> > > there is still
> > > time to fix it and there is one thing I wonder:
> > >
> > > Did any of the nouveau developers look at the netconsole 
> > > captures Chris
> > > posted more than a week ago to check if they somehow help to 
> > > track down
> > > the root of this problem?
> > 
> >  I did now and I can't spot anything. I think at this point it 
> >  would
> >  make sense to dump the active tasks/threads via sqsrq keys to 
> >  see if
> >  any is in a weird state preventing the machine from shutting 
> >  down.
> > >>>
> > >>> Many thx for looking into it!
> > >>
> > >> Yes, thanks Karol.
> > >>
> > >> Attached is the output from dmesg when this block of code:
> > >>
> > >> /bin/mount /dev/sda7 /mnt/sda7
> > >> /bin/mountpoint /proc || /bin/mount /proc
> > >> /bin/dmesg -w > /mnt/sda7/sysrq.dmesg.log &
> > >> /bin/echo t > /proc/sysrq-trigger
> > >> /bin/sleep 1
> > >> /bin/sync
> > >> /bin/sleep 1
> > >> kill $(pidof dmesg)
> > >> /bin/umount /mnt/sda7
> > >>
> > >> is executed immediately before /sbin/reboot is called as the 
> > >> final step of rebooting my system.
> > >>
> > >> I hope this is what you were looking for, but if not, please let 
> > >> me know what you need
> > 
> >  Thanks Dave. [...]
> > >>> FWIW, in case anyone strands here in the archives: the msg was
> > >>> truncated. The full post can be found in a new thread:
> > >>>
> > >>> https://lore.kernel.org/lkml/e0b80506-b3cf-315b-4327-1b988d860...@googlemail.com/
> > >>>
> > >>> Sadly it seems the info "With runpm=0, both reboot and poweroff 
> > >>> work on
> > >>> my laptop." didn't bring us much further to a solution. :-/ I don't
> > >>> really like it, but for regression tracking I'm now putting this on 
> > >>> the
> > >>> back-burner, as a fix is not in sight.
> > >>>
> > >>> #regzbot monitor:
> > >>> https://lore.kernel.org/lkml/e0b80506-b3cf-315b-4327-1b988d860...@googlemail.com/
> > >>> #regzbot backburner: hard to debug and apparently rare
> > >>> #regzbot ignore-activity
> > >>>
> > >>
> > >> yeah.. this bug looks a little annoying. Sadly the only Turing based
> > >> laptop I got doesn't work on Nouveau because of firmware related
> > >> issues and we probably need to get updated ones from Nvidia here :(
> > >>
> > >> But it's a bit weird that the kernel doesn't shutdown, because I 
> > >> don't
> > >> see anything in the logs which would prevent that from happening.
> > >> Unless it's waiting on one of the tasks to complete, but none of them
> > >> looked in any way nouveau related.
> > >>
> > >> If somebody else has any fancy kernel debugging tips here to figure
> > >> out why it hangs, that would be very helpful...
> > >>
> > >
> > > I think I've figured this out. It's to do with how my system is 
> > > configured. I do have an initrd, but the only thing on
> > > it is the cpu microcode which, it is recommended, should be loaded 
> > > early. The absence of the 

[PATCH v3 3/3] drm/bridge: tfp410: If connected, use I2C for polled HPD status.

2023-02-20 Thread Jonathan Cormier
From: Michael Williamson 

If the I2C bus is connected on the TFP410, then use the register
status bit to determine connection state.  This is needed, in particular,
for polling the state when the Hot Plug detect is not connected to
a controlling CPU via GPIO/IRQ lane.

Signed-off-by: Michael Williamson 
Signed-off-by: Jonathan Cormier 
---
 drivers/gpu/drm/bridge/ti-tfp410.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c 
b/drivers/gpu/drm/bridge/ti-tfp410.c
index 41007d05d584..eeb7202452aa 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -28,6 +28,9 @@
 #define TFP410_BIT_BSEL BIT(2)
 #define TFP410_BIT_DSEL BIT(3)
 
+#define TFP410_REG_CTL_2_MODE  0x09
+#define TFP410_BIT_HTPLG BIT(1)
+
 static const struct regmap_config tfp410_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
@@ -105,6 +108,15 @@ static enum drm_connector_status
 tfp410_connector_detect(struct drm_connector *connector, bool force)
 {
struct tfp410 *dvi = drm_connector_to_tfp410(connector);
+   int ret;
+
+   if (dvi->i2c) {
+   ret = regmap_test_bits(dvi->regmap, TFP410_REG_CTL_2_MODE, 
TFP410_BIT_HTPLG);
+   if (ret < 0)
+   dev_err(dvi->dev, "%s failed to read HTPLG bit : %d\n", 
__func__, ret);
+   else
+   return ret ? connector_status_connected : 
connector_status_disconnected;
+   }
 
return drm_bridge_detect(dvi->next_bridge);
 }

-- 
2.25.1



[PATCH v3 2/3] drm/bridge: tfp410: Fix logic to configured polled HPD

2023-02-20 Thread Jonathan Cormier
From: Michael Williamson 

The logic to configure polling (vs async/irq notification) of hot-plug
events was not correct.  If the connected bridge requires polling,
then inform the upstream bridge we also require polling.

Signed-off-by: Michael Williamson 
Signed-off-by: Jonathan Cormier 
---
 drivers/gpu/drm/bridge/ti-tfp410.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c 
b/drivers/gpu/drm/bridge/ti-tfp410.c
index bb3f8d0ff207..41007d05d584 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -155,7 +155,7 @@ static int tfp410_attach(struct drm_bridge *bridge,
return -ENODEV;
}
 
-   if (dvi->next_bridge->ops & DRM_BRIDGE_OP_DETECT)
+   if (dvi->next_bridge->ops & DRM_BRIDGE_OP_HPD)
dvi->connector.polled = DRM_CONNECTOR_POLL_HPD;
else
dvi->connector.polled = DRM_CONNECTOR_POLL_CONNECT | 
DRM_CONNECTOR_POLL_DISCONNECT;

-- 
2.25.1



[PATCH v3 1/3] drm/bridge: tfp410: Support basic I2C interface

2023-02-20 Thread Jonathan Cormier
From: Michael Williamson 

The TFP410 driver does not support I2C.  As such, the device remains in
Power Down if the I2C is enabled by the bootstrap pins.

Add basic support for the I2C interface, and provide support to take
the device out of power down when enabled.  Also read the bootstrap mode
pins via the CTL_1_MODE register when using the I2C bus.

Signed-off-by: Michael Williamson 
Signed-off-by: Jonathan Cormier 
---
 drivers/gpu/drm/bridge/ti-tfp410.c | 93 +++---
 1 file changed, 67 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c 
b/drivers/gpu/drm/bridge/ti-tfp410.c
index b9635abbad16..bb3f8d0ff207 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -6,6 +6,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -21,6 +22,20 @@
 
 #define HOTPLUG_DEBOUNCE_MS1100
 
+#define TFP410_REG_CTL_1_MODE  0x08
+#define TFP410_BIT_PD   BIT(0)
+#define TFP410_BIT_EDGE BIT(1)
+#define TFP410_BIT_BSEL BIT(2)
+#define TFP410_BIT_DSEL BIT(3)
+
+static const struct regmap_config tfp410_regmap_config = {
+   .reg_bits = 8,
+   .val_bits = 8,
+
+   .max_register = 0xff,
+   .cache_type = REGCACHE_NONE,
+};
+
 struct tfp410 {
struct drm_bridge   bridge;
struct drm_connectorconnector;
@@ -33,6 +48,8 @@ struct tfp410 {
struct drm_bridge   *next_bridge;
 
struct device *dev;
+   struct i2c_client *i2c;
+   struct regmap *regmap;
 };
 
 static inline struct tfp410 *
@@ -183,6 +200,9 @@ static void tfp410_enable(struct drm_bridge *bridge)
 {
struct tfp410 *dvi = drm_bridge_to_tfp410(bridge);
 
+   if (dvi->i2c)
+   regmap_set_bits(dvi->regmap, TFP410_REG_CTL_1_MODE, 
TFP410_BIT_PD);
+
gpiod_set_value_cansleep(dvi->powerdown, 0);
 }
 
@@ -190,6 +210,9 @@ static void tfp410_disable(struct drm_bridge *bridge)
 {
struct tfp410 *dvi = drm_bridge_to_tfp410(bridge);
 
+   if (dvi->i2c)
+   regmap_clear_bits(dvi->regmap, TFP410_REG_CTL_1_MODE, 
TFP410_BIT_PD);
+
gpiod_set_value_cansleep(dvi->powerdown, 1);
 }
 
@@ -221,38 +244,48 @@ static const struct drm_bridge_timings 
tfp410_default_timings = {
.hold_time_ps = 1300,
 };
 
-static int tfp410_parse_timings(struct tfp410 *dvi, bool i2c)
+static int tfp410_parse_timings(struct tfp410 *dvi)
 {
struct drm_bridge_timings *timings = >timings;
struct device_node *ep;
u32 pclk_sample = 0;
u32 bus_width = 24;
u32 deskew = 0;
+   unsigned int val = 0;
+   int ret = 0;
 
/* Start with defaults. */
*timings = tfp410_default_timings;
 
-   if (i2c)
+   if (dvi->i2c) {
/*
-* In I2C mode timings are configured through the I2C interface.
-* As the driver doesn't support I2C configuration yet, we just
-* go with the defaults (BSEL=1, DSEL=1, DKEN=0, EDGE=1).
+* For now, assume settings are latched from pins on reset / 
power up.
+* Should add options to optionally set them out of DT 
properties.
 */
-   return 0;
-
-   /*
-* In non-I2C mode, timings are configured through the BSEL, DSEL, DKEN
-* and EDGE pins. They are specified in DT through endpoint properties
-* and vendor-specific properties.
-*/
-   ep = of_graph_get_endpoint_by_regs(dvi->dev->of_node, 0, 0);
-   if (!ep)
-   return -EINVAL;
-
-   /* Get the sampling edge from the endpoint. */
-   of_property_read_u32(ep, "pclk-sample", _sample);
-   of_property_read_u32(ep, "bus-width", _width);
-   of_node_put(ep);
+   ret = regmap_read(dvi->regmap, TFP410_REG_CTL_1_MODE, );
+   if (ret) {
+   dev_err(dvi->dev, "Read failed on CTL_1_MODE\n");
+   return ret;
+   }
+   pclk_sample = (val & TFP410_BIT_EDGE) ? 1 : 0;
+   bus_width = (val & TFP410_BIT_BSEL) ? 24 : 12;
+   dev_dbg(dvi->dev, "(0x%02X) : detected %d bus width, %s edge 
sampling\n",
+   val, bus_width, pclk_sample ? "positive" : "negative");
+   } else {
+   /*
+* In non-I2C mode, timings are configured through the BSEL, 
DSEL, DKEN
+* and EDGE pins. They are specified in DT through endpoint 
properties
+* and vendor-specific properties.
+*/
+   ep = of_graph_get_endpoint_by_regs(dvi->dev->of_node, 0, 0);
+   if (!ep)
+   return -EINVAL;
+
+   /* Get the sampling edge from the endpoint. */
+   of_property_read_u32(ep, "pclk-sample", _sample);
+   of_property_read_u32(ep, "bus-width", _width);
+   of_node_put(ep);
+   }
 

[PATCH v3 0/3] drm/bridge: tfp410: Add i2c support

2023-02-20 Thread Jonathan Cormier
The TFP410 driver does not support I2C.  As such, the device remains in
Power Down if the I2C is enabled by the bootstrap pins.

Add basic support for the I2C interface, and provide support to take
the device out of power down when enabled.  Also read the bootstrap mode
pins via the CTL_1_MODE register when using the I2C bus.

Also allow polling device to support hdmi/dvi hotplug detection.

Signed-off-by: Jonathan Cormier 
---
Changes in v3:
- Drop dt-bindings i2c example
- Link to v2: 
https://lore.kernel.org/r/20230125-tfp410_i2c-v2-0-bf22f4dcb...@criticallink.com

Changes in v2:
- Fix dt_binding_check errors
- Remove hdmi connector from binding example
- Fix compile warning. Unused variable and unsigned int instead of int for ret
- Fix commit titles
- Drop of_match_ptr change
- Link to v1: 
https://lore.kernel.org/r/20230125-tfp410_i2c-v1-0-66a4d4e39...@criticallink.com

---
Michael Williamson (3):
  drm/bridge: tfp410: Support basic I2C interface
  drm/bridge: tfp410: Fix logic to configured polled HPD
  drm/bridge: tfp410: If connected, use I2C for polled HPD status.

 drivers/gpu/drm/bridge/ti-tfp410.c | 107 +++--
 1 file changed, 80 insertions(+), 27 deletions(-)
---
base-commit: 93f875a8526a291005e7f38478079526c843cbec
change-id: 20230125-tfp410_i2c-3b270b0bf3e0

Best regards,
-- 
Jonathan Cormier 



Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE

2023-02-20 Thread Matthew Wilcox
On Mon, Feb 20, 2023 at 06:06:03PM +0100, Danilo Krummrich wrote:
> On 2/20/23 16:10, Matthew Wilcox wrote:
> > This is why we like people to use the spinlock embedded in the tree.
> > There's nothing for the user to care about.  If the access really is
> > serialised, acquiring/releasing the uncontended spinlock is a minimal
> > cost compared to all the other things that will happen while modifying
> > the tree.
> 
> I think as for the users of the GPUVA manager we'd have two cases:
> 
> 1) Accesses to the manager (and hence the tree) are serialized, no lock
> needed.
> 
> 2) Multiple operations on the tree must be locked in order to make them
> appear atomic.

Could you give an example here of what you'd like to do?  Ideally
something complicated so I don't say "Oh, you can just do this" when
there's a more complex example for which "this" won't work.  I'm sure
that's embedded somewhere in the next 20-odd patches, but it's probably
quicker for you to describe in terms of tree operations that have to
appear atomic than for me to try to figure it out.

> In either case the embedded spinlock wouldn't be useful, we'd either need an
> external lock or no lock at all.
> 
> If there are any internal reasons why specific tree operations must be
> mutually excluded (such as those you explain below), wouldn't it make more
> sense to always have the internal lock and, optionally, allow users to
> specify an external lock additionally?

So the way this works for the XArray, which is a little older than the
Maple tree, is that we always use the internal spinlock for
modifications (possibly BH or IRQ safe), and if someone wants to
use an external mutex to make some callers atomic with respect to each
other, they're free to do so.  In that case, the XArray doesn't check
the user's external locking at all, because it really can't know.

I'd advise taking that approach; if there's really no way to use the
internal spinlock to make your complicated updates appear atomic
then just let the maple tree use its internal spinlock, and you can
also use your external mutex however you like.


[PATCH v5 14/14] drm/i915: Add deadline based boost support

2023-02-20 Thread Rob Clark
From: Rob Clark 

v2: rebase

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/i915/i915_request.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 7503dcb9043b..44491e7e214c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -97,6 +97,25 @@ static bool i915_fence_enable_signaling(struct dma_fence 
*fence)
return i915_request_enable_breadcrumb(to_request(fence));
 }
 
+static void i915_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
+{
+   struct i915_request *rq = to_request(fence);
+
+   if (i915_request_completed(rq))
+   return;
+
+   if (i915_request_started(rq))
+   return;
+
+   /*
+* TODO something more clever for deadlines that are in the
+* future.  I think probably track the nearest deadline in
+* rq->timeline and set timer to trigger boost accordingly?
+*/
+
+   intel_rps_boost(rq);
+}
+
 static signed long i915_fence_wait(struct dma_fence *fence,
   bool interruptible,
   signed long timeout)
@@ -182,6 +201,7 @@ const struct dma_fence_ops i915_fence_ops = {
.signaled = i915_fence_signaled,
.wait = i915_fence_wait,
.release = i915_fence_release,
+   .set_deadline = i915_fence_set_deadline,
 };
 
 static void irq_execute_cb(struct irq_work *wrk)
-- 
2.39.1



[PATCH v5 10/14] drm/vblank: Add helper to get next vblank time

2023-02-20 Thread Rob Clark
From: Rob Clark 

Will be used in the next commit to set a deadline on fences that an
atomic update is waiting on.

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/drm_vblank.c | 32 
 include/drm/drm_vblank.h |  1 +
 2 files changed, 33 insertions(+)

diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index 2ff31717a3de..caf25ebb34c5 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -980,6 +980,38 @@ u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
 }
 EXPORT_SYMBOL(drm_crtc_vblank_count_and_time);
 
+/**
+ * drm_crtc_next_vblank_time - calculate the time of the next vblank
+ * @crtc: the crtc for which to calculate next vblank time
+ * @vblanktime: pointer to time to receive the next vblank timestamp.
+ *
+ * Calculate the expected time of the next vblank based on time of previous
+ * vblank and frame duration
+ */
+int drm_crtc_next_vblank_time(struct drm_crtc *crtc, ktime_t *vblanktime)
+{
+   unsigned int pipe = drm_crtc_index(crtc);
+   struct drm_vblank_crtc *vblank = >dev->vblank[pipe];
+   u64 count;
+
+   if (!vblank->framedur_ns)
+   return -EINVAL;
+
+   count = drm_vblank_count_and_time(crtc->dev, pipe, vblanktime);
+
+   /*
+* If we don't get a valid count, then we probably also don't
+* have a valid time:
+*/
+   if (!count)
+   return -EINVAL;
+
+   *vblanktime = ktime_add(*vblanktime, ns_to_ktime(vblank->framedur_ns));
+
+   return 0;
+}
+EXPORT_SYMBOL(drm_crtc_next_vblank_time);
+
 static void send_vblank_event(struct drm_device *dev,
struct drm_pending_vblank_event *e,
u64 seq, ktime_t now)
diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h
index 733a3e2d1d10..a63bc2c92f3c 100644
--- a/include/drm/drm_vblank.h
+++ b/include/drm/drm_vblank.h
@@ -230,6 +230,7 @@ bool drm_dev_has_vblank(const struct drm_device *dev);
 u64 drm_crtc_vblank_count(struct drm_crtc *crtc);
 u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
   ktime_t *vblanktime);
+int drm_crtc_next_vblank_time(struct drm_crtc *crtc, ktime_t *vblanktime);
 void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
   struct drm_pending_vblank_event *e);
 void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
-- 
2.39.1



[PATCH v5 12/14] drm/msm: Add deadline based boost support

2023-02-20 Thread Rob Clark
From: Rob Clark 

Track the nearest deadline on a fence timeline and set a timer to expire
shortly before to trigger boost if the fence has not yet been signaled.

v2: rebase

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_fence.c | 74 +
 drivers/gpu/drm/msm/msm_fence.h | 20 +
 2 files changed, 94 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index 56641408ea74..51b461f32103 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -8,6 +8,35 @@
 
 #include "msm_drv.h"
 #include "msm_fence.h"
+#include "msm_gpu.h"
+
+static struct msm_gpu *fctx2gpu(struct msm_fence_context *fctx)
+{
+   struct msm_drm_private *priv = fctx->dev->dev_private;
+   return priv->gpu;
+}
+
+static enum hrtimer_restart deadline_timer(struct hrtimer *t)
+{
+   struct msm_fence_context *fctx = container_of(t,
+   struct msm_fence_context, deadline_timer);
+
+   kthread_queue_work(fctx2gpu(fctx)->worker, >deadline_work);
+
+   return HRTIMER_NORESTART;
+}
+
+static void deadline_work(struct kthread_work *work)
+{
+   struct msm_fence_context *fctx = container_of(work,
+   struct msm_fence_context, deadline_work);
+
+   /* If deadline fence has already passed, nothing to do: */
+   if (msm_fence_completed(fctx, fctx->next_deadline_fence))
+   return;
+
+   msm_devfreq_boost(fctx2gpu(fctx), 2);
+}
 
 
 struct msm_fence_context *
@@ -36,6 +65,13 @@ msm_fence_context_alloc(struct drm_device *dev, volatile 
uint32_t *fenceptr,
fctx->completed_fence = fctx->last_fence;
*fctx->fenceptr = fctx->last_fence;
 
+   hrtimer_init(>deadline_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+   fctx->deadline_timer.function = deadline_timer;
+
+   kthread_init_work(>deadline_work, deadline_work);
+
+   fctx->next_deadline = ktime_get();
+
return fctx;
 }
 
@@ -62,6 +98,8 @@ void msm_update_fence(struct msm_fence_context *fctx, 
uint32_t fence)
spin_lock_irqsave(>spinlock, flags);
if (fence_after(fence, fctx->completed_fence))
fctx->completed_fence = fence;
+   if (msm_fence_completed(fctx, fctx->next_deadline_fence))
+   hrtimer_cancel(>deadline_timer);
spin_unlock_irqrestore(>spinlock, flags);
 }
 
@@ -92,10 +130,46 @@ static bool msm_fence_signaled(struct dma_fence *fence)
return msm_fence_completed(f->fctx, f->base.seqno);
 }
 
+static void msm_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
+{
+   struct msm_fence *f = to_msm_fence(fence);
+   struct msm_fence_context *fctx = f->fctx;
+   unsigned long flags;
+   ktime_t now;
+
+   spin_lock_irqsave(>spinlock, flags);
+   now = ktime_get();
+
+   if (ktime_after(now, fctx->next_deadline) ||
+   ktime_before(deadline, fctx->next_deadline)) {
+   fctx->next_deadline = deadline;
+   fctx->next_deadline_fence =
+   max(fctx->next_deadline_fence, (uint32_t)fence->seqno);
+
+   /*
+* Set timer to trigger boost 3ms before deadline, or
+* if we are already less than 3ms before the deadline
+* schedule boost work immediately.
+*/
+   deadline = ktime_sub(deadline, ms_to_ktime(3));
+
+   if (ktime_after(now, deadline)) {
+   kthread_queue_work(fctx2gpu(fctx)->worker,
+   >deadline_work);
+   } else {
+   hrtimer_start(>deadline_timer, deadline,
+   HRTIMER_MODE_ABS);
+   }
+   }
+
+   spin_unlock_irqrestore(>spinlock, flags);
+}
+
 static const struct dma_fence_ops msm_fence_ops = {
.get_driver_name = msm_fence_get_driver_name,
.get_timeline_name = msm_fence_get_timeline_name,
.signaled = msm_fence_signaled,
+   .set_deadline = msm_fence_set_deadline,
 };
 
 struct dma_fence *
diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h
index 7f1798c54cd1..cdaebfb94f5c 100644
--- a/drivers/gpu/drm/msm/msm_fence.h
+++ b/drivers/gpu/drm/msm/msm_fence.h
@@ -52,6 +52,26 @@ struct msm_fence_context {
volatile uint32_t *fenceptr;
 
spinlock_t spinlock;
+
+   /*
+* TODO this doesn't really deal with multiple deadlines, like
+* if userspace got multiple frames ahead.. OTOH atomic updates
+* don't queue, so maybe that is ok
+*/
+
+   /** next_deadline: Time of next deadline */
+   ktime_t next_deadline;
+
+   /**
+* next_deadline_fence:
+*
+* Fence value for next pending deadline.  The deadline timer is
+* canceled when this fence is signaled.
+*/
+   uint32_t next_deadline_fence;
+
+   struct hrtimer 

[PATCH v5 09/14] drm/syncobj: Add deadline support for syncobj waits

2023-02-20 Thread Rob Clark
From: Rob Clark 

Add a new flag to let userspace provide a deadline as a hint for syncobj
and timeline waits.  This gives a hint to the driver signaling the
backing fences about how soon userspace needs it to compete work, so it
can addjust GPU frequency accordingly.  An immediate deadline can be
given to provide something equivalent to i915 "wait boost".

v2: Use absolute u64 ns value for deadline hint, drop cap and driver
feature flag in favor of allowing count_handles==0 as a way for
userspace to probe kernel for support of new flag

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/drm_syncobj.c | 59 +++
 include/uapi/drm/drm.h|  5 +++
 2 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0c2be8360525..4f9c3b3906f1 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -973,7 +973,8 @@ static signed long drm_syncobj_array_wait_timeout(struct 
drm_syncobj **syncobjs,
  uint32_t count,
  uint32_t flags,
  signed long timeout,
- uint32_t *idx)
+ uint32_t *idx,
+ ktime_t *deadline)
 {
struct syncobj_wait_entry *entries;
struct dma_fence *fence;
@@ -1053,6 +1054,15 @@ static signed long drm_syncobj_array_wait_timeout(struct 
drm_syncobj **syncobjs,
drm_syncobj_fence_add_wait(syncobjs[i], [i]);
}
 
+   if (deadline) {
+   for (i = 0; i < count; ++i) {
+   fence = entries[i].fence;
+   if (!fence)
+   continue;
+   dma_fence_set_deadline(fence, *deadline);
+   }
+   }
+
do {
set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1151,7 +1161,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
  struct drm_file *file_private,
  struct drm_syncobj_wait *wait,
  struct drm_syncobj_timeline_wait 
*timeline_wait,
- struct drm_syncobj **syncobjs, bool timeline)
+ struct drm_syncobj **syncobjs, bool timeline,
+ ktime_t *deadline)
 {
signed long timeout = 0;
uint32_t first = ~0;
@@ -1162,7 +1173,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
 NULL,
 wait->count_handles,
 wait->flags,
-timeout, );
+timeout, ,
+deadline);
if (timeout < 0)
return timeout;
wait->first_signaled = first;
@@ -1172,7 +1184,8 @@ static int drm_syncobj_array_wait(struct drm_device *dev,
 
u64_to_user_ptr(timeline_wait->points),
 
timeline_wait->count_handles,
 timeline_wait->flags,
-timeout, );
+timeout, ,
+deadline);
if (timeout < 0)
return timeout;
timeline_wait->first_signaled = first;
@@ -1243,17 +1256,22 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void 
*data,
 {
struct drm_syncobj_wait *args = data;
struct drm_syncobj **syncobjs;
+   unsigned possible_flags;
+   ktime_t t, *tp = NULL;
int ret = 0;
 
if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
return -EOPNOTSUPP;
 
-   if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
-   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+   possible_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE;
+
+   if (args->flags & ~possible_flags)
return -EINVAL;
 
if (args->count_handles == 0)
-   return -EINVAL;
+   return 0;
 
ret = drm_syncobj_array_find(file_private,
 u64_to_user_ptr(args->handles),
@@ -1262,8 +1280,13 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void 
*data,
if 

[PATCH v5 06/14] dma-buf/sync_file: Support (E)POLLPRI

2023-02-20 Thread Rob Clark
From: Rob Clark 

Allow userspace to use the EPOLLPRI/POLLPRI flag to indicate an urgent
wait (as opposed to a "housekeeping" wait to know when to cleanup after
some work has completed).  Usermode components of GPU driver stacks
often poll() on fence fd's to know when it is safe to do things like
free or reuse a buffer, but they can also poll() on a fence fd when
waiting to read back results from the GPU.  The EPOLLPRI/POLLPRI flag
lets the kernel differentiate these two cases.

Signed-off-by: Rob Clark 
---
 drivers/dma-buf/sync_file.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 418021cfb87c..cbe96295373b 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -192,6 +192,14 @@ static __poll_t sync_file_poll(struct file *file, 
poll_table *wait)
 {
struct sync_file *sync_file = file->private_data;
 
+   /*
+* The POLLPRI/EPOLLPRI flag can be used to signal that
+* userspace wants the fence to signal ASAP, express this
+* as an immediate deadline.
+*/
+   if (poll_requested_events(wait) & EPOLLPRI)
+   dma_fence_set_deadline(sync_file->fence, ktime_get());
+
poll_wait(file, _file->wq, wait);
 
if (list_empty(_file->cb.node) &&
-- 
2.39.1



[PATCH v5 13/14] drm/msm: Add wait-boost support

2023-02-20 Thread Rob Clark
From: Rob Clark 

Add a way for various userspace waits to signal urgency.

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_drv.c | 12 
 drivers/gpu/drm/msm/msm_gem.c |  5 +
 include/uapi/drm/msm_drm.h| 14 --
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index aca48c868c14..f6764a86b2da 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -46,6 +46,7 @@
  * - 1.8.0 - Add MSM_BO_CACHED_COHERENT for supported GPUs (a6xx)
  * - 1.9.0 - Add MSM_SUBMIT_FENCE_SN_IN
  * - 1.10.0 - Add MSM_SUBMIT_BO_NO_IMPLICIT
+ * - 1.11.0 - Add wait boost (MSM_WAIT_FENCE_BOOST, MSM_PREP_BOOST)
  */
 #define MSM_VERSION_MAJOR  1
 #define MSM_VERSION_MINOR  10
@@ -899,7 +900,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void 
*data,
 }
 
 static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
- ktime_t timeout)
+ ktime_t timeout, uint32_t flags)
 {
struct dma_fence *fence;
int ret;
@@ -929,6 +930,9 @@ static int wait_fence(struct msm_gpu_submitqueue *queue, 
uint32_t fence_id,
if (!fence)
return 0;
 
+   if (flags & MSM_WAIT_FENCE_BOOST)
+   dma_fence_set_deadline(fence, ktime_get());
+
ret = dma_fence_wait_timeout(fence, true, timeout_to_jiffies());
if (ret == 0) {
ret = -ETIMEDOUT;
@@ -949,8 +953,8 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, 
void *data,
struct msm_gpu_submitqueue *queue;
int ret;
 
-   if (args->pad) {
-   DRM_ERROR("invalid pad: %08x\n", args->pad);
+   if (args->flags & ~MSM_WAIT_FENCE_FLAGS) {
+   DRM_ERROR("invalid flags: %08x\n", args->flags);
return -EINVAL;
}
 
@@ -961,7 +965,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, 
void *data,
if (!queue)
return -ENOENT;
 
-   ret = wait_fence(queue, args->fence, to_ktime(args->timeout));
+   ret = wait_fence(queue, args->fence, to_ktime(args->timeout), 
args->flags);
 
msm_submitqueue_put(queue);
 
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 1dee0d18abbb..dd4a0d773f6e 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -846,6 +846,11 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t 
op, ktime_t *timeout)
op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout);
long ret;
 
+   if (op & MSM_PREP_BOOST) {
+   dma_resv_set_deadline(obj->resv, dma_resv_usage_rw(write),
+ ktime_get());
+   }
+
ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write),
true,  remain);
if (ret == 0)
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 329100016e7c..dbf0d6f43fa9 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -151,8 +151,13 @@ struct drm_msm_gem_info {
 #define MSM_PREP_READ0x01
 #define MSM_PREP_WRITE   0x02
 #define MSM_PREP_NOSYNC  0x04
+#define MSM_PREP_BOOST   0x08
 
-#define MSM_PREP_FLAGS   (MSM_PREP_READ | MSM_PREP_WRITE | MSM_PREP_NOSYNC)
+#define MSM_PREP_FLAGS   (MSM_PREP_READ | \
+ MSM_PREP_WRITE | \
+ MSM_PREP_NOSYNC | \
+ MSM_PREP_BOOST | \
+ 0)
 
 struct drm_msm_gem_cpu_prep {
__u32 handle; /* in */
@@ -286,6 +291,11 @@ struct drm_msm_gem_submit {
 
 };
 
+#define MSM_WAIT_FENCE_BOOST   0x0001
+#define MSM_WAIT_FENCE_FLAGS   ( \
+   MSM_WAIT_FENCE_BOOST | \
+   0)
+
 /* The normal way to synchronize with the GPU is just to CPU_PREP on
  * a buffer if you need to access it from the CPU (other cmdstream
  * submission from same or other contexts, PAGE_FLIP ioctl, etc, all
@@ -295,7 +305,7 @@ struct drm_msm_gem_submit {
  */
 struct drm_msm_wait_fence {
__u32 fence;  /* in */
-   __u32 pad;
+   __u32 flags;  /* in, bitmask of MSM_WAIT_FENCE_x */
struct drm_msm_timespec timeout;   /* in */
__u32 queueid; /* in, submitqueue id */
 };
-- 
2.39.1



[PATCH v5 08/14] drm/scheduler: Add fence deadline support

2023-02-20 Thread Rob Clark
As the finished fence is the one that is exposed to userspace, and
therefore the one that other operations, like atomic update, would
block on, we need to propagate the deadline from from the finished
fence to the actual hw fence.

v2: Split into drm_sched_fence_set_parent() (ckoenig)
v3: Ensure a thread calling drm_sched_fence_set_deadline_finished() sees
fence->parent set before drm_sched_fence_set_parent() does this
test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT).

Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/scheduler/sched_fence.c | 46 +
 drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
 include/drm/gpu_scheduler.h |  8 +
 3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_fence.c 
b/drivers/gpu/drm/scheduler/sched_fence.c
index 7fd869520ef2..43e2d4f5fe3b 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -123,6 +123,37 @@ static void drm_sched_fence_release_finished(struct 
dma_fence *f)
dma_fence_put(>scheduled);
 }
 
+static void drm_sched_fence_set_deadline_finished(struct dma_fence *f,
+ ktime_t deadline)
+{
+   struct drm_sched_fence *fence = to_drm_sched_fence(f);
+   struct dma_fence *parent;
+   unsigned long flags;
+
+   spin_lock_irqsave(>lock, flags);
+
+   /* If we already have an earlier deadline, keep it: */
+   if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, >flags) &&
+   ktime_before(fence->deadline, deadline)) {
+   spin_unlock_irqrestore(>lock, flags);
+   return;
+   }
+
+   fence->deadline = deadline;
+   set_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT, >flags);
+
+   spin_unlock_irqrestore(>lock, flags);
+
+   /*
+* smp_load_aquire() to ensure that if we are racing another
+* thread calling drm_sched_fence_set_parent(), that we see
+* the parent set before it calls test_bit(HAS_DEADLINE_BIT)
+*/
+   parent = smp_load_acquire(>parent);
+   if (parent)
+   dma_fence_set_deadline(parent, deadline);
+}
+
 static const struct dma_fence_ops drm_sched_fence_ops_scheduled = {
.get_driver_name = drm_sched_fence_get_driver_name,
.get_timeline_name = drm_sched_fence_get_timeline_name,
@@ -133,6 +164,7 @@ static const struct dma_fence_ops 
drm_sched_fence_ops_finished = {
.get_driver_name = drm_sched_fence_get_driver_name,
.get_timeline_name = drm_sched_fence_get_timeline_name,
.release = drm_sched_fence_release_finished,
+   .set_deadline = drm_sched_fence_set_deadline_finished,
 };
 
 struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
@@ -147,6 +179,20 @@ struct drm_sched_fence *to_drm_sched_fence(struct 
dma_fence *f)
 }
 EXPORT_SYMBOL(to_drm_sched_fence);
 
+void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
+   struct dma_fence *fence)
+{
+   /*
+* smp_store_release() to ensure another thread racing us
+* in drm_sched_fence_set_deadline_finished() sees the
+* fence's parent set before test_bit()
+*/
+   smp_store_release(_fence->parent, dma_fence_get(fence));
+   if (test_bit(DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
+_fence->finished.flags))
+   dma_fence_set_deadline(fence, s_fence->deadline);
+}
+
 struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
  void *owner)
 {
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 4e6ad6e122bc..007f98c48f8d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1019,7 +1019,7 @@ static int drm_sched_main(void *param)
drm_sched_fence_scheduled(s_fence);
 
if (!IS_ERR_OR_NULL(fence)) {
-   s_fence->parent = dma_fence_get(fence);
+   drm_sched_fence_set_parent(s_fence, fence);
/* Drop for original kref_init of the fence */
dma_fence_put(fence);
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 9db9e5e504ee..8b31a954a44d 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -280,6 +280,12 @@ struct drm_sched_fence {
  */
struct dma_fencefinished;
 
+   /**
+* @deadline: deadline set on _sched_fence.finished which
+* potentially needs to be propagated to _sched_fence.parent
+*/
+   ktime_t deadline;
+
 /**
  * @parent: the fence returned by _sched_backend_ops.run_job
  * when scheduling the job on hardware. We signal the
@@ -568,6 +574,8 @@ void drm_sched_entity_set_priority(struct drm_sched_entity 
*entity,

[PATCH v5 07/14] dma-buf/sw_sync: Add fence deadline support

2023-02-20 Thread Rob Clark
From: Rob Clark 

This consists of simply storing the most recent deadline, and adding an
ioctl to retrieve the deadline.  This can be used in conjunction with
the SET_DEADLINE ioctl on a fence fd for testing.  Ie. create various
sw_sync fences, merge them into a fence-array, set deadline on the
fence-array and confirm that it is propagated properly to each fence.

v2: Switch UABI to express deadline as u64

Signed-off-by: Rob Clark 
Reviewed-by: Christian König 
---
 drivers/dma-buf/sw_sync.c| 58 
 drivers/dma-buf/sync_debug.h |  2 ++
 2 files changed, 60 insertions(+)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 348b3a9170fa..3e2315ee955b 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -52,12 +52,28 @@ struct sw_sync_create_fence_data {
__s32   fence; /* fd of new fence */
 };
 
+/**
+ * struct sw_sync_get_deadline - get the deadline hint of a sw_sync fence
+ * @deadline_ns: absolute time of the deadline
+ * @pad:   must be zero
+ * @fence_fd:  the sw_sync fence fd (in)
+ *
+ * The timebase for the deadline is CLOCK_MONOTONIC (same as vblank)
+ */
+struct sw_sync_get_deadline {
+   __u64   deadline_ns;
+   __u32   pad;
+   __s32   fence_fd;
+};
+
 #define SW_SYNC_IOC_MAGIC  'W'
 
 #define SW_SYNC_IOC_CREATE_FENCE   _IOWR(SW_SYNC_IOC_MAGIC, 0,\
struct sw_sync_create_fence_data)
 
 #define SW_SYNC_IOC_INC_IOW(SW_SYNC_IOC_MAGIC, 1, 
__u32)
+#define SW_SYNC_GET_DEADLINE   _IOWR(SW_SYNC_IOC_MAGIC, 2, \
+   struct sw_sync_get_deadline)
 
 static const struct dma_fence_ops timeline_fence_ops;
 
@@ -171,6 +187,13 @@ static void timeline_fence_timeline_value_str(struct 
dma_fence *fence,
snprintf(str, size, "%d", parent->value);
 }
 
+static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t 
deadline)
+{
+   struct sync_pt *pt = dma_fence_to_sync_pt(fence);
+
+   pt->deadline = deadline;
+}
+
 static const struct dma_fence_ops timeline_fence_ops = {
.get_driver_name = timeline_fence_get_driver_name,
.get_timeline_name = timeline_fence_get_timeline_name,
@@ -179,6 +202,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
.release = timeline_fence_release,
.fence_value_str = timeline_fence_value_str,
.timeline_value_str = timeline_fence_timeline_value_str,
+   .set_deadline = timeline_fence_set_deadline,
 };
 
 /**
@@ -387,6 +411,37 @@ static long sw_sync_ioctl_inc(struct sync_timeline *obj, 
unsigned long arg)
return 0;
 }
 
+static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long 
arg)
+{
+   struct sw_sync_get_deadline data;
+   struct dma_fence *fence;
+   struct sync_pt *pt;
+
+   if (copy_from_user(, (void __user *)arg, sizeof(data)))
+   return -EFAULT;
+
+   if (data.deadline_ns || data.pad)
+   return -EINVAL;
+
+   fence = sync_file_get_fence(data.fence_fd);
+   if (!fence)
+   return -EINVAL;
+
+   pt = dma_fence_to_sync_pt(fence);
+   if (!pt)
+   return -EINVAL;
+
+
+   data.deadline_ns = ktime_to_ns(pt->deadline);
+
+   dma_fence_put(fence);
+
+   if (copy_to_user((void __user *)arg, , sizeof(data)))
+   return -EFAULT;
+
+   return 0;
+}
+
 static long sw_sync_ioctl(struct file *file, unsigned int cmd,
  unsigned long arg)
 {
@@ -399,6 +454,9 @@ static long sw_sync_ioctl(struct file *file, unsigned int 
cmd,
case SW_SYNC_IOC_INC:
return sw_sync_ioctl_inc(obj, arg);
 
+   case SW_SYNC_GET_DEADLINE:
+   return sw_sync_ioctl_get_deadline(obj, arg);
+
default:
return -ENOTTY;
}
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index 6176e52ba2d7..2e0146d0bdbb 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -55,11 +55,13 @@ static inline struct sync_timeline *dma_fence_parent(struct 
dma_fence *fence)
  * @base: base fence object
  * @link: link on the sync timeline's list
  * @node: node in the sync timeline's tree
+ * @deadline: the most recently set fence deadline
  */
 struct sync_pt {
struct dma_fence base;
struct list_head link;
struct rb_node node;
+   ktime_t deadline;
 };
 
 extern const struct file_operations sw_sync_debugfs_fops;
-- 
2.39.1



[PATCH v5 04/14] dma-buf/dma-resv: Add a way to set fence deadline

2023-02-20 Thread Rob Clark
From: Rob Clark 

Add a way to set a deadline on remaining resv fences according to the
requested usage.

Signed-off-by: Rob Clark 
Reviewed-by: Christian König 
---
 drivers/dma-buf/dma-resv.c | 22 ++
 include/linux/dma-resv.h   |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 1c76aed8e262..2a594b754af1 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -684,6 +684,28 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum 
dma_resv_usage usage,
 }
 EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
 
+/**
+ * dma_resv_set_deadline - Set a deadline on reservation's objects fences
+ * @obj: the reservation object
+ * @usage: controls which fences to include, see enum dma_resv_usage.
+ * @deadline: the requested deadline (MONOTONIC)
+ *
+ * May be called without holding the dma_resv lock.  Sets @deadline on
+ * all fences filtered by @usage.
+ */
+void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage,
+  ktime_t deadline)
+{
+   struct dma_resv_iter cursor;
+   struct dma_fence *fence;
+
+   dma_resv_iter_begin(, obj, usage);
+   dma_resv_for_each_fence_unlocked(, fence) {
+   dma_fence_set_deadline(fence, deadline);
+   }
+   dma_resv_iter_end();
+}
+EXPORT_SYMBOL_GPL(dma_resv_set_deadline);
 
 /**
  * dma_resv_test_signaled - Test if a reservation object's fences have been
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 0637659a702c..8d0e34dad446 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -479,6 +479,8 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum 
dma_resv_usage usage,
 int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
 long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
   bool intr, unsigned long timeout);
+void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage,
+  ktime_t deadline);
 bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage);
 void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
 
-- 
2.39.1



[PATCH v5 11/14] drm/atomic-helper: Set fence deadline for vblank

2023-02-20 Thread Rob Clark
From: Rob Clark 

For an atomic commit updating a single CRTC (ie. a pageflip) calculate
the next vblank time, and inform the fence(s) of that deadline.

v2: Comment typo fix (danvet)

Signed-off-by: Rob Clark 
Reviewed-by: Daniel Vetter 
Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/drm_atomic_helper.c | 36 +
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
b/drivers/gpu/drm/drm_atomic_helper.c
index d579fd8f7cb8..35a4dc714920 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1511,6 +1511,40 @@ void drm_atomic_helper_commit_modeset_enables(struct 
drm_device *dev,
 }
 EXPORT_SYMBOL(drm_atomic_helper_commit_modeset_enables);
 
+/*
+ * For atomic updates which touch just a single CRTC, calculate the time of the
+ * next vblank, and inform all the fences of the deadline.
+ */
+static void set_fence_deadline(struct drm_device *dev,
+  struct drm_atomic_state *state)
+{
+   struct drm_crtc *crtc, *wait_crtc = NULL;
+   struct drm_crtc_state *new_crtc_state;
+   struct drm_plane *plane;
+   struct drm_plane_state *new_plane_state;
+   ktime_t vbltime;
+   int i;
+
+   for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) {
+   if (wait_crtc)
+   return;
+   wait_crtc = crtc;
+   }
+
+   /* If no CRTCs updated, then nothing to do: */
+   if (!wait_crtc)
+   return;
+
+   if (drm_crtc_next_vblank_time(wait_crtc, ))
+   return;
+
+   for_each_new_plane_in_state (state, plane, new_plane_state, i) {
+   if (!new_plane_state->fence)
+   continue;
+   dma_fence_set_deadline(new_plane_state->fence, vbltime);
+   }
+}
+
 /**
  * drm_atomic_helper_wait_for_fences - wait for fences stashed in plane state
  * @dev: DRM device
@@ -1540,6 +1574,8 @@ int drm_atomic_helper_wait_for_fences(struct drm_device 
*dev,
struct drm_plane_state *new_plane_state;
int i, ret;
 
+   set_fence_deadline(dev, state);
+
for_each_new_plane_in_state(state, plane, new_plane_state, i) {
if (!new_plane_state->fence)
continue;
-- 
2.39.1



[PATCH v5 05/14] dma-buf/sync_file: Add SET_DEADLINE ioctl

2023-02-20 Thread Rob Clark
From: Rob Clark 

The initial purpose is for igt tests, but this would also be useful for
compositors that wait until close to vblank deadline to make decisions
about which frame to show.

The igt tests can be found at:

https://gitlab.freedesktop.org/robclark/igt-gpu-tools/-/commits/fence-deadline

v2: Clarify the timebase, add link to igt tests
v3: Use u64 value in ns to express deadline.

Signed-off-by: Rob Clark 
---
 drivers/dma-buf/sync_file.c| 19 +++
 include/uapi/linux/sync_file.h | 23 +++
 2 files changed, 42 insertions(+)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..418021cfb87c 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -350,6 +350,22 @@ static long sync_file_ioctl_fence_info(struct sync_file 
*sync_file,
return ret;
 }
 
+static int sync_file_ioctl_set_deadline(struct sync_file *sync_file,
+   unsigned long arg)
+{
+   struct sync_set_deadline ts;
+
+   if (copy_from_user(, (void __user *)arg, sizeof(ts)))
+   return -EFAULT;
+
+   if (ts.pad)
+   return -EINVAL;
+
+   dma_fence_set_deadline(sync_file->fence, ns_to_ktime(ts.deadline_ns));
+
+   return 0;
+}
+
 static long sync_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
 {
@@ -362,6 +378,9 @@ static long sync_file_ioctl(struct file *file, unsigned int 
cmd,
case SYNC_IOC_FILE_INFO:
return sync_file_ioctl_fence_info(sync_file, arg);
 
+   case SYNC_IOC_SET_DEADLINE:
+   return sync_file_ioctl_set_deadline(sync_file, arg);
+
default:
return -ENOTTY;
}
diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
index ee2dcfb3d660..6d2ad4addf1b 100644
--- a/include/uapi/linux/sync_file.h
+++ b/include/uapi/linux/sync_file.h
@@ -67,6 +67,21 @@ struct sync_file_info {
__u64   sync_fence_info;
 };
 
+/**
+ * struct sync_set_deadline - set a deadline hint on a fence
+ * @deadline_ns: absolute time of the deadline
+ * @pad:   must be zero
+ *
+ * The timebase for the deadline is CLOCK_MONOTONIC (same as vblank)
+ */
+struct sync_set_deadline {
+   __u64   deadline_ns;
+   /* Not strictly needed for alignment but gives some possibility
+* for future extension:
+*/
+   __u64   pad;
+};
+
 #define SYNC_IOC_MAGIC '>'
 
 /**
@@ -95,4 +110,12 @@ struct sync_file_info {
  */
 #define SYNC_IOC_FILE_INFO _IOWR(SYNC_IOC_MAGIC, 4, struct sync_file_info)
 
+
+/**
+ * DOC: SYNC_IOC_SET_DEADLINE - set a deadline on a fence
+ *
+ * Allows userspace to set a deadline on a fence, see dma_fence_set_deadline()
+ */
+#define SYNC_IOC_SET_DEADLINE  _IOW(SYNC_IOC_MAGIC, 5, struct 
sync_set_deadline)
+
 #endif /* _UAPI_LINUX_SYNC_H */
-- 
2.39.1



[PATCH v5 02/14] dma-buf/fence-array: Add fence deadline support

2023-02-20 Thread Rob Clark
From: Rob Clark 

Propagate the deadline to all the fences in the array.

Signed-off-by: Rob Clark 
Reviewed-by: Christian König 
---
 drivers/dma-buf/dma-fence-array.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/dma-buf/dma-fence-array.c 
b/drivers/dma-buf/dma-fence-array.c
index 5c8a7084577b..9b3ce8948351 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -123,12 +123,23 @@ static void dma_fence_array_release(struct dma_fence 
*fence)
dma_fence_free(fence);
 }
 
+static void dma_fence_array_set_deadline(struct dma_fence *fence,
+ktime_t deadline)
+{
+   struct dma_fence_array *array = to_dma_fence_array(fence);
+   unsigned i;
+
+   for (i = 0; i < array->num_fences; ++i)
+   dma_fence_set_deadline(array->fences[i], deadline);
+}
+
 const struct dma_fence_ops dma_fence_array_ops = {
.get_driver_name = dma_fence_array_get_driver_name,
.get_timeline_name = dma_fence_array_get_timeline_name,
.enable_signaling = dma_fence_array_enable_signaling,
.signaled = dma_fence_array_signaled,
.release = dma_fence_array_release,
+   .set_deadline = dma_fence_array_set_deadline,
 };
 EXPORT_SYMBOL(dma_fence_array_ops);
 
-- 
2.39.1



[PATCH v5 03/14] dma-buf/fence-chain: Add fence deadline support

2023-02-20 Thread Rob Clark
From: Rob Clark 

Propagate the deadline to all the fences in the chain.

Signed-off-by: Rob Clark 
Reviewed-by: Christian König  for this one.
---
 drivers/dma-buf/dma-fence-chain.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
index a0d920576ba6..4684874af612 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -206,6 +206,18 @@ static void dma_fence_chain_release(struct dma_fence 
*fence)
dma_fence_free(fence);
 }
 
+
+static void dma_fence_chain_set_deadline(struct dma_fence *fence,
+ktime_t deadline)
+{
+   dma_fence_chain_for_each(fence, fence) {
+   struct dma_fence_chain *chain = to_dma_fence_chain(fence);
+   struct dma_fence *f = chain ? chain->fence : fence;
+
+   dma_fence_set_deadline(f, deadline);
+   }
+}
+
 const struct dma_fence_ops dma_fence_chain_ops = {
.use_64bit_seqno = true,
.get_driver_name = dma_fence_chain_get_driver_name,
@@ -213,6 +225,7 @@ const struct dma_fence_ops dma_fence_chain_ops = {
.enable_signaling = dma_fence_chain_enable_signaling,
.signaled = dma_fence_chain_signaled,
.release = dma_fence_chain_release,
+   .set_deadline = dma_fence_chain_set_deadline,
 };
 EXPORT_SYMBOL(dma_fence_chain_ops);
 
-- 
2.39.1



[PATCH v5 01/14] dma-buf/dma-fence: Add deadline awareness

2023-02-20 Thread Rob Clark
From: Rob Clark 

Add a way to hint to the fence signaler of an upcoming deadline, such as
vblank, which the fence waiter would prefer not to miss.  This is to aid
the fence signaler in making power management decisions, like boosting
frequency as the deadline approaches and awareness of missing deadlines
so that can be factored in to the frequency scaling.

v2: Drop dma_fence::deadline and related logic to filter duplicate
deadlines, to avoid increasing dma_fence size.  The fence-context
implementation will need similar logic to track deadlines of all
the fences on the same timeline.  [ckoenig]
v3: Clarify locking wrt. set_deadline callback
v4: Clarify in docs comment that this is a hint

Signed-off-by: Rob Clark 
Reviewed-by: Christian König 
---
 drivers/dma-buf/dma-fence.c | 21 +
 include/linux/dma-fence.h   | 20 
 2 files changed, 41 insertions(+)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 0de0482cd36e..e3331761384c 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -912,6 +912,27 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, 
uint32_t count,
 }
 EXPORT_SYMBOL(dma_fence_wait_any_timeout);
 
+
+/**
+ * dma_fence_set_deadline - set desired fence-wait deadline
+ * @fence:the fence that is to be waited on
+ * @deadline: the time by which the waiter hopes for the fence to be
+ *signaled
+ *
+ * Give the fence signaler a hint about an upcoming deadline, such as
+ * vblank, by which point the waiter would prefer the fence to be
+ * signaled by.  This is intended to give feedback to the fence signaler
+ * to aid in power management decisions, such as boosting GPU frequency
+ * if a periodic vblank deadline is approaching but the fence is not
+ * yet signaled..
+ */
+void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)
+{
+   if (fence->ops->set_deadline && !dma_fence_is_signaled(fence))
+   fence->ops->set_deadline(fence, deadline);
+}
+EXPORT_SYMBOL(dma_fence_set_deadline);
+
 /**
  * dma_fence_describe - Dump fence describtion into seq_file
  * @fence: the 6fence to describe
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 775cdc0b4f24..d77f6591c453 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -99,6 +99,7 @@ enum dma_fence_flag_bits {
DMA_FENCE_FLAG_SIGNALED_BIT,
DMA_FENCE_FLAG_TIMESTAMP_BIT,
DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+   DMA_FENCE_FLAG_HAS_DEADLINE_BIT,
DMA_FENCE_FLAG_USER_BITS, /* must always be last member */
 };
 
@@ -257,6 +258,23 @@ struct dma_fence_ops {
 */
void (*timeline_value_str)(struct dma_fence *fence,
   char *str, int size);
+
+   /**
+* @set_deadline:
+*
+* Callback to allow a fence waiter to inform the fence signaler of
+* an upcoming deadline, such as vblank, by which point the waiter
+* would prefer the fence to be signaled by.  This is intended to
+* give feedback to the fence signaler to aid in power management
+* decisions, such as boosting GPU frequency.
+*
+* This is called without _fence.lock held, it can be called
+* multiple times and from any context.  Locking is up to the callee
+* if it has some state to manage.
+*
+* This callback is optional.
+*/
+   void (*set_deadline)(struct dma_fence *fence, ktime_t deadline);
 };
 
 void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
@@ -583,6 +601,8 @@ static inline signed long dma_fence_wait(struct dma_fence 
*fence, bool intr)
return ret < 0 ? ret : 0;
 }
 
+void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
+
 struct dma_fence *dma_fence_get_stub(void);
 struct dma_fence *dma_fence_allocate_private_stub(void);
 u64 dma_fence_context_alloc(unsigned num);
-- 
2.39.1



[PATCH v5 00/14] dma-fence: Deadline awareness

2023-02-20 Thread Rob Clark
From: Rob Clark 

This series adds a deadline hint to fences, so realtime deadlines
such as vblank can be communicated to the fence signaller for power/
frequency management decisions.

This is partially inspired by a trick i915 does, but implemented
via dma-fence for a couple of reasons:

1) To continue to be able to use the atomic helpers
2) To support cases where display and gpu are different drivers

This iteration adds a dma-fence ioctl to set a deadline (both to
support igt-tests, and compositors which delay decisions about which
client buffer to display), and a sw_sync ioctl to read back the
deadline.  IGT tests utilizing these can be found at:

  https://gitlab.freedesktop.org/robclark/igt-gpu-tools/-/commits/fence-deadline


v1: https://patchwork.freedesktop.org/series/93035/
v2: Move filtering out of later deadlines to fence implementation
to avoid increasing the size of dma_fence
v3: Add support in fence-array and fence-chain; Add some uabi to
support igt tests and userspace compositors.
v4: Rebase, address various comments, and add syncobj deadline
support, and sync_file EPOLLPRI based on experience with perf/
freq issues with clvk compute workloads on i915 (anv)
v5: Clarify that this is a hint as opposed to a more hard deadline
guarantee, switch to using u64 ns values in UABI (still absolute
CLOCK_MONOTONIC values), drop syncobj related cap and driver
feature flag in favor of allowing count_handles==0 for probing
kernel support.

Rob Clark (14):
  dma-buf/dma-fence: Add deadline awareness
  dma-buf/fence-array: Add fence deadline support
  dma-buf/fence-chain: Add fence deadline support
  dma-buf/dma-resv: Add a way to set fence deadline
  dma-buf/sync_file: Add SET_DEADLINE ioctl
  dma-buf/sync_file: Support (E)POLLPRI
  dma-buf/sw_sync: Add fence deadline support
  drm/scheduler: Add fence deadline support
  drm/syncobj: Add deadline support for syncobj waits
  drm/vblank: Add helper to get next vblank time
  drm/atomic-helper: Set fence deadline for vblank
  drm/msm: Add deadline based boost support
  drm/msm: Add wait-boost support
  drm/i915: Add deadline based boost support

 drivers/dma-buf/dma-fence-array.c   | 11 
 drivers/dma-buf/dma-fence-chain.c   | 13 +
 drivers/dma-buf/dma-fence.c | 21 +++
 drivers/dma-buf/dma-resv.c  | 22 
 drivers/dma-buf/sw_sync.c   | 58 +++
 drivers/dma-buf/sync_debug.h|  2 +
 drivers/dma-buf/sync_file.c | 27 +
 drivers/gpu/drm/drm_atomic_helper.c | 36 
 drivers/gpu/drm/drm_syncobj.c   | 59 +++-
 drivers/gpu/drm/drm_vblank.c| 32 +++
 drivers/gpu/drm/i915/i915_request.c | 20 +++
 drivers/gpu/drm/msm/msm_drv.c   | 12 ++--
 drivers/gpu/drm/msm/msm_fence.c | 74 +
 drivers/gpu/drm/msm/msm_fence.h | 20 +++
 drivers/gpu/drm/msm/msm_gem.c   |  5 ++
 drivers/gpu/drm/scheduler/sched_fence.c | 46 +++
 drivers/gpu/drm/scheduler/sched_main.c  |  2 +-
 include/drm/drm_vblank.h|  1 +
 include/drm/gpu_scheduler.h |  8 +++
 include/linux/dma-fence.h   | 20 +++
 include/linux/dma-resv.h|  2 +
 include/uapi/drm/drm.h  |  5 ++
 include/uapi/drm/msm_drm.h  | 14 -
 include/uapi/linux/sync_file.h  | 23 
 24 files changed, 513 insertions(+), 20 deletions(-)

-- 
2.39.1



Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE

2023-02-20 Thread Danilo Krummrich

On 2/20/23 16:10, Matthew Wilcox wrote:

On Mon, Feb 20, 2023 at 03:00:59PM +0100, Danilo Krummrich wrote:

On 2/17/23 20:38, Matthew Wilcox wrote:

On Fri, Feb 17, 2023 at 02:44:10PM +0100, Danilo Krummrich wrote:

Generic components making use of the maple tree (such as the
DRM GPUVA Manager) delegate the responsibility of ensuring mutual
exclusion to their users.

While such components could inherit the concept of an external lock,
some users might just serialize the access to the component and hence to
the internal maple tree.

In order to allow such use cases, add a new flag MT_FLAGS_LOCK_NONE to
indicate not to do any internal lockdep checks.


I'm really against this change.

First, we really should check that users have their locking right.
It's bitten us so many times when they get it wrong.


In case of the DRM GPUVA manager, some users might serialize the access to
the GPUVA manager and hence to it's maple tree instances, e.g. through the
drm_gpu_scheduler. In such a case ensuring to hold a lock would be a bit
pointless and I wouldn't really know how to "sell" this to potential users
of the GPUVA manager.


This is why we like people to use the spinlock embedded in the tree.
There's nothing for the user to care about.  If the access really is
serialised, acquiring/releasing the uncontended spinlock is a minimal
cost compared to all the other things that will happen while modifying
the tree.


I think as for the users of the GPUVA manager we'd have two cases:

1) Accesses to the manager (and hence the tree) are serialized, no lock 
needed.


2) Multiple operations on the tree must be locked in order to make them 
appear atomic.


In either case the embedded spinlock wouldn't be useful, we'd either 
need an external lock or no lock at all.


If there are any internal reasons why specific tree operations must be 
mutually excluded (such as those you explain below), wouldn't it make 
more sense to always have the internal lock and, optionally, allow users 
to specify an external lock additionally?





Second, having a lock allows us to defragment the slab cache.  The
patches to do that haven't gone anywhere recently, but if we drop the
requirement now, we'll never be able to compact ranges of memory that
have slabs allocated to them.



Not sure if I get that, do you mind explaining a bit how this would affect
other users of the maple tree, such as my use case, the GPUVA manager?


When we want to free a slab in order to defragment memory, we need
to relocate all the objects allocated within that slab.  To do that
for the maple tree node cache, for each node in this particular slab,
we'll need to walk up to the top of the tree and lock it.  We can then
allocate a new node from a different slab, change the parent to point
to the new node and drop the lock.  After an RCU delay, we can free the
slab and create a larger contiguous block of memory.

As I said, this is somewhat hypothetical in that there's no current
code in the tree to reclaim slabs when we're trying to defragment
memory.  And that's because it's hard to do.  The XArray and maple
tree were designed to make it possible for their slabs.





Re: [PATCH v2] drm/msm: DEVFREQ_GOV_SIMPLE_ONDEMAND is no longer needed

2023-02-20 Thread Rob Clark
On Sun, Feb 19, 2023 at 5:04 PM Randy Dunlap  wrote:
>
> DRM_MSM no longer needs DEVFREQ_GOV_SIMPLE_ONDEMAND (since dbd7a2a941b8
> in linux-next: PM / devfreq: Fix build issues with devfreq disabled),
> so remove that select from the DRM_MSM Kconfig file.
>
> Fixes: 6563f60f14cb ("drm/msm/gpu: Add devfreq tuning debugfs")
> Signed-off-by: Randy Dunlap 
> Cc: Rob Clark 
> Cc: Abhinav Kumar 
> Cc: Dmitry Baryshkov 
> Cc: Sean Paul 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: linux-arm-...@vger.kernel.org
> Cc: dri-devel@lists.freedesktop.org
> Cc: freedr...@lists.freedesktop.org

Thanks

Reviewed-by: Rob Clark 

> ---
> v2: since  has been patched, this select is no longer
> needed (Rob Clark)
>
>  drivers/gpu/drm/msm/Kconfig |1 -
>  1 file changed, 1 deletion(-)
>
> diff -- a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
> --- a/drivers/gpu/drm/msm/Kconfig
> +++ b/drivers/gpu/drm/msm/Kconfig
> @@ -23,7 +23,6 @@ config DRM_MSM
> select SHMEM
> select TMPFS
> select QCOM_SCM
> -   select DEVFREQ_GOV_SIMPLE_ONDEMAND
> select WANT_DEV_COREDUMP
> select SND_SOC_HDMI_CODEC if SND_SOC
> select SYNC_FILE


Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 8:51 AM Tvrtko Ursulin
 wrote:
>
>
> On 20/02/2023 16:44, Tvrtko Ursulin wrote:
> >
> > On 20/02/2023 15:52, Rob Clark wrote:
> >> On Mon, Feb 20, 2023 at 3:33 AM Tvrtko Ursulin
> >>  wrote:
> >>>
> >>>
> >>> On 17/02/2023 20:45, Rodrigo Vivi wrote:
> >
> > [snip]
> >
> >>> Yeah I agree. And as not all media use cases are the same, as are not
> >>> all compute contexts someone somewhere will need to run a series of
> >>> workloads for power and performance numbers. Ideally that someone would
> >>> be the entity for which it makes sense to look at all use cases, from
> >>> server room to client, 3d, media and compute for both. If we could get
> >>> the capability to run this in some automated fashion, akin to CI, we
> >>> would even have a chance to keep making good decisions in the future.
> >>>
> >>> Or we do some one off testing for this instance, but we still need a
> >>> range of workloads and parts to do it properly..
> >>>
> > I also think the "arms race" scenario isn't really as much of a
> > problem as you think.  There aren't _that_ many things using the GPU
> > at the same time (compared to # of things using CPU).   And a lot of
> > mobile games throttle framerate to avoid draining your battery too
> > quickly (after all, if your battery is dead you can't keep buying loot
> > boxes or whatever).
> 
>  Very good point.
> >>>
> >>> On this one I still disagree from the point of view that it does not
> >>> make it good uapi if we allow everyone to select themselves for priority
> >>> handling (one flavour or the other).
> >>
> >> There is plenty of precedent for userspace giving hints to the kernel
> >> about scheduling and freq mgmt.  Like schedutil uclamp stuff.
> >> Although I think that is all based on cgroups.
> >
> > I knew about SCHED_DEADLINE and that it requires CAP_SYS_NICE, but I did
> > not know about uclamp. Quick experiment with uclampset suggests it
> > indeed does not require elevated privilege. If that is indeed so, it is
> > good enough for me as a precedent.
> >
> > It appears to work using sched_setscheduler so maybe could define
> > something similar in i915/xe, per context or per client, not sure.
> >
> > Maybe it would start as a primitive implementation but the uapi would
> > not preclude making it smart(er) afterwards. Or passing along to GuC to
> > do it's thing with it.
>
> Hmmm having said that, how would we fix clvk performance using that? We
> would either need the library to do a new step when creating contexts,
> or allow external control so outside entity can do it. And then the
> question is based on what it decides to do it? Is it possible to know
> which, for instance, Chrome tab will be (or is) using clvk so that tab
> management code does it?

I am not sure.. the clvk usage is, I think, not actually in chrome
itself, but something camera related?

Presumably we could build some cgroup knobs to control how the driver
reacts to the "deadline" hints (ie. ignore them completely, or impose
some upper limit on how much freq boost will be applied, etc).  I
think this sort of control of how the driver responds to hints
probably fits best with cgroups, as that is how we are already
implementing similar tuning for cpufreq/sched.  (Ie. foreground app or
tab gets moved to a different cgroup.)  But admittedly I haven't
looked too closely at how cgroups work on the kernel side.

BR,
-R

> Regards,
>
> Tvrtko
>
> >> In the fence/syncobj case, I think we need per-wait hints.. because
> >> for a single process the driver will be doing both housekeeping waits
> >> and potentially urgent waits.  There may also be some room for some
> >> cgroup or similar knobs to control things like what max priority an
> >> app can ask for, and whether or how aggressively the kernel responds
> >> to the "deadline" hints.  So as far as "arms race", I don't think I'd
> >
> > Per wait hints are okay I guess even with "I am important" in their name
> > if sched_setscheduler allows raising uclamp.min just like that. In which
> > case cgroup limits to mimick cpu uclamp also make sense.
> >
> >> change anything about my "fence deadline" proposal.. but that it might
> >> just be one piece of the overall puzzle.
> >
> > That SCHED_DEADLINE requires CAP_SYS_NICE does not worry you?
> >
> > Regards,
> >
> > Tvrtko


Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 8:44 AM Tvrtko Ursulin
 wrote:
>
>
> On 20/02/2023 15:52, Rob Clark wrote:
> > On Mon, Feb 20, 2023 at 3:33 AM Tvrtko Ursulin
> >  wrote:
> >>
> >>
> >> On 17/02/2023 20:45, Rodrigo Vivi wrote:
>
> [snip]
>
> >> Yeah I agree. And as not all media use cases are the same, as are not
> >> all compute contexts someone somewhere will need to run a series of
> >> workloads for power and performance numbers. Ideally that someone would
> >> be the entity for which it makes sense to look at all use cases, from
> >> server room to client, 3d, media and compute for both. If we could get
> >> the capability to run this in some automated fashion, akin to CI, we
> >> would even have a chance to keep making good decisions in the future.
> >>
> >> Or we do some one off testing for this instance, but we still need a
> >> range of workloads and parts to do it properly..
> >>
>  I also think the "arms race" scenario isn't really as much of a
>  problem as you think.  There aren't _that_ many things using the GPU
>  at the same time (compared to # of things using CPU).   And a lot of
>  mobile games throttle framerate to avoid draining your battery too
>  quickly (after all, if your battery is dead you can't keep buying loot
>  boxes or whatever).
> >>>
> >>> Very good point.
> >>
> >> On this one I still disagree from the point of view that it does not
> >> make it good uapi if we allow everyone to select themselves for priority
> >> handling (one flavour or the other).
> >
> > There is plenty of precedent for userspace giving hints to the kernel
> > about scheduling and freq mgmt.  Like schedutil uclamp stuff.
> > Although I think that is all based on cgroups.
>
> I knew about SCHED_DEADLINE and that it requires CAP_SYS_NICE, but I did
> not know about uclamp. Quick experiment with uclampset suggests it
> indeed does not require elevated privilege. If that is indeed so, it is
> good enough for me as a precedent.
>
> It appears to work using sched_setscheduler so maybe could define
> something similar in i915/xe, per context or per client, not sure.
>
> Maybe it would start as a primitive implementation but the uapi would
> not preclude making it smart(er) afterwards. Or passing along to GuC to
> do it's thing with it.
>
> > In the fence/syncobj case, I think we need per-wait hints.. because
> > for a single process the driver will be doing both housekeeping waits
> > and potentially urgent waits.  There may also be some room for some
> > cgroup or similar knobs to control things like what max priority an
> > app can ask for, and whether or how aggressively the kernel responds
> > to the "deadline" hints.  So as far as "arms race", I don't think I'd
>
> Per wait hints are okay I guess even with "I am important" in their name
> if sched_setscheduler allows raising uclamp.min just like that. In which
> case cgroup limits to mimick cpu uclamp also make sense.
>
> > change anything about my "fence deadline" proposal.. but that it might
> > just be one piece of the overall puzzle.
>
> That SCHED_DEADLINE requires CAP_SYS_NICE does not worry you?

This gets to why the name "fence deadline" is perhaps not the best..
it really isn't meant to be analogous to SCHED_DEADLINE, but rather
just a hint to the driver about what userspace is doing.  Maybe we
just document it more strongly as a hint?

BR,
-R

> Regards,
>
> Tvrtko


Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Tvrtko Ursulin



On 20/02/2023 16:44, Tvrtko Ursulin wrote:


On 20/02/2023 15:52, Rob Clark wrote:

On Mon, Feb 20, 2023 at 3:33 AM Tvrtko Ursulin
 wrote:



On 17/02/2023 20:45, Rodrigo Vivi wrote:


[snip]


Yeah I agree. And as not all media use cases are the same, as are not
all compute contexts someone somewhere will need to run a series of
workloads for power and performance numbers. Ideally that someone would
be the entity for which it makes sense to look at all use cases, from
server room to client, 3d, media and compute for both. If we could get
the capability to run this in some automated fashion, akin to CI, we
would even have a chance to keep making good decisions in the future.

Or we do some one off testing for this instance, but we still need a
range of workloads and parts to do it properly..


I also think the "arms race" scenario isn't really as much of a
problem as you think.  There aren't _that_ many things using the GPU
at the same time (compared to # of things using CPU).   And a lot of
mobile games throttle framerate to avoid draining your battery too
quickly (after all, if your battery is dead you can't keep buying loot
boxes or whatever).


Very good point.


On this one I still disagree from the point of view that it does not
make it good uapi if we allow everyone to select themselves for priority
handling (one flavour or the other).


There is plenty of precedent for userspace giving hints to the kernel
about scheduling and freq mgmt.  Like schedutil uclamp stuff.
Although I think that is all based on cgroups.


I knew about SCHED_DEADLINE and that it requires CAP_SYS_NICE, but I did 
not know about uclamp. Quick experiment with uclampset suggests it 
indeed does not require elevated privilege. If that is indeed so, it is 
good enough for me as a precedent.


It appears to work using sched_setscheduler so maybe could define 
something similar in i915/xe, per context or per client, not sure.


Maybe it would start as a primitive implementation but the uapi would 
not preclude making it smart(er) afterwards. Or passing along to GuC to 
do it's thing with it.


Hmmm having said that, how would we fix clvk performance using that? We 
would either need the library to do a new step when creating contexts, 
or allow external control so outside entity can do it. And then the 
question is based on what it decides to do it? Is it possible to know 
which, for instance, Chrome tab will be (or is) using clvk so that tab 
management code does it?


Regards,

Tvrtko


In the fence/syncobj case, I think we need per-wait hints.. because
for a single process the driver will be doing both housekeeping waits
and potentially urgent waits.  There may also be some room for some
cgroup or similar knobs to control things like what max priority an
app can ask for, and whether or how aggressively the kernel responds
to the "deadline" hints.  So as far as "arms race", I don't think I'd


Per wait hints are okay I guess even with "I am important" in their name 
if sched_setscheduler allows raising uclamp.min just like that. In which 
case cgroup limits to mimick cpu uclamp also make sense.



change anything about my "fence deadline" proposal.. but that it might
just be one piece of the overall puzzle.


That SCHED_DEADLINE requires CAP_SYS_NICE does not worry you?

Regards,

Tvrtko


[PATCH v4] drm: add kms driver for loongson display controller

2023-02-20 Thread suijingfeng
From: suijingfeng 

Loongson display controller IP has been integrated in both Loongson
North Bridge chipset(ls7a1000 and ls7a2000) and Loongson SoCs(ls2k1000
and ls2k2000 etc), it even been included in Loongson BMC products.

The display controller is a PCI device, it has two display pipe.
For the DC in LS7A1000 and LS2K1000 each way has a DVO output interface
which provide RGB888 signals, vertical & horizontal synchronisations,
and the pixel clock. Each CRTC is able to support 1920x1080@60Hz,
the maximum resolution is 2048x2048 according to the hardware spec.

For the DC in LS7A2000, each display pipe is equipped with a built-in
HDMI encoder which is compliant with HDMI 1.4 specification. Thus it
support 3840x2160@30Hz. The first display pipe is also equipped with
a transparent vga encoder which is parallel with the HDMI encoder.
The DC in LS7A2000 is more complete, besides above feature, it has
two hardware cursors, two hardware vblank counter and two scanout
position recorders.

 v1 -> v2:
  1) Use hpd status reg when polling for ls7a2000
  2) Fix all warnings emerged when compile with W=1

 v2 -> v3:
  1) Add COMPILE_TEST in Kconfig and make the driver off by default
  2) Alphabetical sorting headers
  3) Untangle register access functions as much as possible
  4) Switch to TTM based memory manager and prefer cached mapping
 for Loongson SoC
  5) Add chip id detection method, now all models are distinguishable.
  6) Revise built-in HDMI phy driver, nearly all main stream mode
 below 4K@30Hz is tested. This driver support these mode very
 well including and clone and extend display mode.

  v3 -> v4
  1) Quickly fix small a small mistake.

  As a basic kms 2D driver, the user experience is good enough when
  using X server under mate and xfce desktop environment. This dirver
  is ready to be merged, and i will take the responsibility if there
  any bug happen.

Signed-off-by: Li Yi 
Signed-off-by: Sui Jingfeng 
Signed-off-by: suijingfeng <15330273...@189.cn>
---
 drivers/gpu/drm/Kconfig |   2 +
 drivers/gpu/drm/Makefile|   1 +
 drivers/gpu/drm/lsdc/Kconfig|  16 +
 drivers/gpu/drm/lsdc/Makefile   |  15 +
 drivers/gpu/drm/lsdc/lsdc_crtc.c| 376 ++
 drivers/gpu/drm/lsdc/lsdc_debugfs.c | 233 
 drivers/gpu/drm/lsdc/lsdc_drv.c | 569 
 drivers/gpu/drm/lsdc/lsdc_drv.h | 377 ++
 drivers/gpu/drm/lsdc/lsdc_i2c.c | 193 ++
 drivers/gpu/drm/lsdc/lsdc_irq.c |  85 +
 drivers/gpu/drm/lsdc/lsdc_output.c  | 465 +++
 drivers/gpu/drm/lsdc/lsdc_plane.c   | 440 +
 drivers/gpu/drm/lsdc/lsdc_pll.c | 468 +++
 drivers/gpu/drm/lsdc/lsdc_pll.h |  78 
 drivers/gpu/drm/lsdc/lsdc_probe.c   |  74 
 drivers/gpu/drm/lsdc/lsdc_regs.h| 343 +
 drivers/gpu/drm/lsdc/lsdc_ttm.c | 450 ++
 drivers/gpu/drm/lsdc/lsdc_ttm.h |  62 +++
 18 files changed, 4247 insertions(+)
 create mode 100644 drivers/gpu/drm/lsdc/Kconfig
 create mode 100644 drivers/gpu/drm/lsdc/Makefile
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_crtc.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_debugfs.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_drv.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_drv.h
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_i2c.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_irq.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_output.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_plane.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_pll.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_pll.h
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_probe.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_regs.h
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_ttm.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_ttm.h

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dc0f94f02a82..3baecd48930b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -367,6 +367,8 @@ source "drivers/gpu/drm/solomon/Kconfig"
 
 source "drivers/gpu/drm/sprd/Kconfig"
 
+source "drivers/gpu/drm/lsdc/Kconfig"
+
 config DRM_HYPERV
tristate "DRM Support for Hyper-V synthetic video device"
depends on DRM && PCI && MMU && HYPERV
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ab4460fcd63f..5a8e2fc8dd5d 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -190,3 +190,4 @@ obj-y   += gud/
 obj-$(CONFIG_DRM_HYPERV) += hyperv/
 obj-y  += solomon/
 obj-$(CONFIG_DRM_SPRD) += sprd/
+obj-$(CONFIG_DRM_LSDC) += lsdc/
diff --git a/drivers/gpu/drm/lsdc/Kconfig b/drivers/gpu/drm/lsdc/Kconfig
new file mode 100644
index ..437bb11dcd57
--- /dev/null
+++ b/drivers/gpu/drm/lsdc/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config DRM_LSDC
+   tristate "DRM support for Loongson Display 

Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Tvrtko Ursulin



On 20/02/2023 15:52, Rob Clark wrote:

On Mon, Feb 20, 2023 at 3:33 AM Tvrtko Ursulin
 wrote:



On 17/02/2023 20:45, Rodrigo Vivi wrote:


[snip]


Yeah I agree. And as not all media use cases are the same, as are not
all compute contexts someone somewhere will need to run a series of
workloads for power and performance numbers. Ideally that someone would
be the entity for which it makes sense to look at all use cases, from
server room to client, 3d, media and compute for both. If we could get
the capability to run this in some automated fashion, akin to CI, we
would even have a chance to keep making good decisions in the future.

Or we do some one off testing for this instance, but we still need a
range of workloads and parts to do it properly..


I also think the "arms race" scenario isn't really as much of a
problem as you think.  There aren't _that_ many things using the GPU
at the same time (compared to # of things using CPU).   And a lot of
mobile games throttle framerate to avoid draining your battery too
quickly (after all, if your battery is dead you can't keep buying loot
boxes or whatever).


Very good point.


On this one I still disagree from the point of view that it does not
make it good uapi if we allow everyone to select themselves for priority
handling (one flavour or the other).


There is plenty of precedent for userspace giving hints to the kernel
about scheduling and freq mgmt.  Like schedutil uclamp stuff.
Although I think that is all based on cgroups.


I knew about SCHED_DEADLINE and that it requires CAP_SYS_NICE, but I did 
not know about uclamp. Quick experiment with uclampset suggests it 
indeed does not require elevated privilege. If that is indeed so, it is 
good enough for me as a precedent.


It appears to work using sched_setscheduler so maybe could define 
something similar in i915/xe, per context or per client, not sure.


Maybe it would start as a primitive implementation but the uapi would 
not preclude making it smart(er) afterwards. Or passing along to GuC to 
do it's thing with it.



In the fence/syncobj case, I think we need per-wait hints.. because
for a single process the driver will be doing both housekeeping waits
and potentially urgent waits.  There may also be some room for some
cgroup or similar knobs to control things like what max priority an
app can ask for, and whether or how aggressively the kernel responds
to the "deadline" hints.  So as far as "arms race", I don't think I'd


Per wait hints are okay I guess even with "I am important" in their name 
if sched_setscheduler allows raising uclamp.min just like that. In which 
case cgroup limits to mimick cpu uclamp also make sense.



change anything about my "fence deadline" proposal.. but that it might
just be one piece of the overall puzzle.


That SCHED_DEADLINE requires CAP_SYS_NICE does not worry you?

Regards,

Tvrtko


[PATCH v3] drm: add kms driver for loongson display controller

2023-02-20 Thread suijingfeng
From: suijingfeng 

Loongson display controller IP has been integrated in both Loongson
North Bridge chipset(ls7a1000 and ls7a2000) and Loongson SoCs(ls2k1000
and ls2k2000 etc), it even been included in Loongson BMC products.

The display controller is a PCI device, it has two display pipe.
For the DC in LS7A1000 and LS2K1000 each way has a DVO output interface
which provide RGB888 signals, vertical & horizontal synchronisations,
and the pixel clock. Each CRTC is able to support 1920x1080@60Hz,
the maximum resolution is 2048x2048 according to the hardware spec.

For the DC in LS7A2000, each display pipe is equipped with a built-in
HDMI encoder which is compliant with HDMI 1.4 specification. Thus it
support 3840x2160@30Hz. The first display pipe is also equipped with
a transparent vga encoder which is parallel with the HDMI encoder.
The DC in LS7A2000 is more complete, besides above feature, it has
two hardware cursors, two hardware vblank counter and two scanout
position recorders.

 v1 -> v2:
  1) Use hpd status reg when polling for ls7a2000
  2) Fix all warnings emerged when compile with W=1

 v2 -> v3:
  1) Add COMPILE_TEST in Kconfig and make the driver off by default
  2) Alphabetical sorting headers
  3) Untangle register access functions as much as possible
  4) Switch to TTM based memory manager and prefer cached mapping
 for Loongson SoC
  5) Add chip id detection method, now all models are distinguishable.
  6) Revise builtin HDMI phy driver, nearly all main stream mode
 below 4K@30Hz is tested, this driver supported these mode very
 well.

  As a basic kms 2D driver, the user experience is good enough when
  using X server under mate and xfce desktop environment. This dirver
  is ready to be merged, and i will take the responsibility if there
  any bug happen.

Signed-off-by: Li Yi 
Signed-off-by: Sui Jingfeng 
Signed-off-by: suijingfeng <15330273...@189.cn>
---
 drivers/gpu/drm/Kconfig |   2 +
 drivers/gpu/drm/Makefile|   1 +
 drivers/gpu/drm/lsdc/Kconfig|  16 +
 drivers/gpu/drm/lsdc/Makefile   |  15 +
 drivers/gpu/drm/lsdc/lsdc_crtc.c| 376 ++
 drivers/gpu/drm/lsdc/lsdc_debugfs.c | 233 
 drivers/gpu/drm/lsdc/lsdc_drv.c | 569 
 drivers/gpu/drm/lsdc/lsdc_drv.h | 377 ++
 drivers/gpu/drm/lsdc/lsdc_i2c.c | 193 ++
 drivers/gpu/drm/lsdc/lsdc_irq.c |  85 +
 drivers/gpu/drm/lsdc/lsdc_output.c  | 465 +++
 drivers/gpu/drm/lsdc/lsdc_plane.c   | 440 +
 drivers/gpu/drm/lsdc/lsdc_pll.c | 468 +++
 drivers/gpu/drm/lsdc/lsdc_pll.h |  78 
 drivers/gpu/drm/lsdc/lsdc_probe.c   |  74 
 drivers/gpu/drm/lsdc/lsdc_regs.h| 343 +
 drivers/gpu/drm/lsdc/lsdc_ttm.c | 450 ++
 drivers/gpu/drm/lsdc/lsdc_ttm.h |  62 +++
 include/drm/drm_gem_vram_helper.h   |   4 -
 19 files changed, 4247 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/lsdc/Kconfig
 create mode 100644 drivers/gpu/drm/lsdc/Makefile
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_crtc.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_debugfs.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_drv.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_drv.h
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_i2c.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_irq.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_output.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_plane.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_pll.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_pll.h
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_probe.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_regs.h
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_ttm.c
 create mode 100644 drivers/gpu/drm/lsdc/lsdc_ttm.h

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index dc0f94f02a82..3baecd48930b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -367,6 +367,8 @@ source "drivers/gpu/drm/solomon/Kconfig"
 
 source "drivers/gpu/drm/sprd/Kconfig"
 
+source "drivers/gpu/drm/lsdc/Kconfig"
+
 config DRM_HYPERV
tristate "DRM Support for Hyper-V synthetic video device"
depends on DRM && PCI && MMU && HYPERV
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index ab4460fcd63f..5a8e2fc8dd5d 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -190,3 +190,4 @@ obj-y   += gud/
 obj-$(CONFIG_DRM_HYPERV) += hyperv/
 obj-y  += solomon/
 obj-$(CONFIG_DRM_SPRD) += sprd/
+obj-$(CONFIG_DRM_LSDC) += lsdc/
diff --git a/drivers/gpu/drm/lsdc/Kconfig b/drivers/gpu/drm/lsdc/Kconfig
new file mode 100644
index ..437bb11dcd57
--- /dev/null
+++ b/drivers/gpu/drm/lsdc/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config DRM_LSDC
+   tristate "DRM support for Loongson Display Controller"
+   depends on DRM 

Re: [PATCH v4 09/14] drm/syncobj: Add deadline support for syncobj waits

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 1:05 AM Pekka Paalanen  wrote:
>
> On Sat, 18 Feb 2023 13:15:52 -0800
> Rob Clark  wrote:
>
> > From: Rob Clark 
> >
> > Add a new flag to let userspace provide a deadline as a hint for syncobj
> > and timeline waits.  This gives a hint to the driver signaling the
> > backing fences about how soon userspace needs it to compete work, so it
> > can addjust GPU frequency accordingly.  An immediate deadline can be
> > given to provide something equivalent to i915 "wait boost".
> >
> > Signed-off-by: Rob Clark 
> > ---
> >
> > I'm a bit on the fence about the addition of the DRM_CAP, but it seems
> > useful to give userspace a way to probe whether the kernel and driver
> > supports the new wait flag, especially since we have vk-common code
> > dealing with syncobjs.  But open to suggestions.
> >
> >  drivers/gpu/drm/drm_ioctl.c   |  3 ++
> >  drivers/gpu/drm/drm_syncobj.c | 59 ---
> >  include/drm/drm_drv.h |  6 
> >  include/uapi/drm/drm.h| 16 --
> >  4 files changed, 71 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> > index 7c9d66ee917d..1c5c942cf0f9 100644
> > --- a/drivers/gpu/drm/drm_ioctl.c
> > +++ b/drivers/gpu/drm/drm_ioctl.c
> > @@ -254,6 +254,9 @@ static int drm_getcap(struct drm_device *dev, void 
> > *data, struct drm_file *file_
> >   case DRM_CAP_SYNCOBJ_TIMELINE:
> >   req->value = drm_core_check_feature(dev, 
> > DRIVER_SYNCOBJ_TIMELINE);
> >   return 0;
> > + case DRM_CAP_SYNCOBJ_DEADLINE:
> > + req->value = drm_core_check_feature(dev, 
> > DRIVER_SYNCOBJ_TIMELINE);
>
> Hi,
>
> is that a typo for DRIVER_SYNCOBJ_DEADLINE?

Ahh, yes, that is a typo.. but I'm thinking of dropping the cap and
allowing count_handles==0 instead as a way for userspace to probe
whether the kernel supports the new ioctl flag/fields.

> > + return 0;
> >   }
> >
> >   /* Other caps only work with KMS drivers */
> > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> > index 0c2be8360525..61cf97972a60 100644
> > --- a/drivers/gpu/drm/drm_syncobj.c
> > +++ b/drivers/gpu/drm/drm_syncobj.c
> > @@ -973,7 +973,8 @@ static signed long 
> > drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
> > uint32_t count,
> > uint32_t flags,
> > signed long timeout,
> > -   uint32_t *idx)
> > +   uint32_t *idx,
> > +   ktime_t *deadline)
> >  {
> >   struct syncobj_wait_entry *entries;
> >   struct dma_fence *fence;
> > @@ -1053,6 +1054,15 @@ static signed long 
> > drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
> >   drm_syncobj_fence_add_wait(syncobjs[i], [i]);
> >   }
> >
> > + if (deadline) {
> > + for (i = 0; i < count; ++i) {
> > + fence = entries[i].fence;
> > + if (!fence)
> > + continue;
> > + dma_fence_set_deadline(fence, *deadline);
> > + }
> > + }
> > +
> >   do {
> >   set_current_state(TASK_INTERRUPTIBLE);
> >
> > @@ -1151,7 +1161,8 @@ static int drm_syncobj_array_wait(struct drm_device 
> > *dev,
> > struct drm_file *file_private,
> > struct drm_syncobj_wait *wait,
> > struct drm_syncobj_timeline_wait 
> > *timeline_wait,
> > -   struct drm_syncobj **syncobjs, bool 
> > timeline)
> > +   struct drm_syncobj **syncobjs, bool 
> > timeline,
> > +   ktime_t *deadline)
> >  {
> >   signed long timeout = 0;
> >   uint32_t first = ~0;
> > @@ -1162,7 +1173,8 @@ static int drm_syncobj_array_wait(struct drm_device 
> > *dev,
> >NULL,
> >wait->count_handles,
> >wait->flags,
> > -  timeout, );
> > +  timeout, ,
> > +  deadline);
> >   if (timeout < 0)
> >   return timeout;
> >   wait->first_signaled = first;
> > @@ -1172,7 +1184,8 @@ static int drm_syncobj_array_wait(struct drm_device 
> > *dev,
> >
> > u64_to_user_ptr(timeline_wait->points),
> >
> > timeline_wait->count_handles,
> > 

RE: [PATCH 08/27] habanalabs: add info when FD released while device still in use

2023-02-20 Thread Tomer Tayar
On Thu, Feb 20, 2023 at 17:55 Stanislaw Gruszka 
 wrote:
> On Fri, Feb 17, 2023 at 11:34:39AM +, Tomer Tayar wrote:
>  >
> > > Ok, just place replace compose_device_in_use_info() with snprintf().
> > > I don't think you need custom implementation of snprintf().
> >
> > compose_device_in_use_info() was added to handle in a single place the
> snprintf() return value and the buffer pointer moving.
> > However, you are correct and it is too much here, as the local buffer size 
> > is set
> with a value that should be enough for max possible print.
> > We will remove compose_device_in_use_info() and use snprintf() directly.
> 
> Actually the safer version would be scnprintf() since for that function
> return value could not be bigger than passed len. Usage then could be
> as simple as:
> 
> n += scnprintf(buf + n, len - n, ...);
> n += scnprintf(buf + n, len - n, ...);
> 
> Regards
> Stanislaw

Sure, we will use it, thanks! 


Re: [PATCH v4 06/14] dma-buf/sync_file: Support (E)POLLPRI

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 12:53 AM Pekka Paalanen  wrote:
>
> On Sat, 18 Feb 2023 13:15:49 -0800
> Rob Clark  wrote:
>
> > From: Rob Clark 
> >
> > Allow userspace to use the EPOLLPRI/POLLPRI flag to indicate an urgent
> > wait (as opposed to a "housekeeping" wait to know when to cleanup after
> > some work has completed).  Usermode components of GPU driver stacks
> > often poll() on fence fd's to know when it is safe to do things like
> > free or reuse a buffer, but they can also poll() on a fence fd when
> > waiting to read back results from the GPU.  The EPOLLPRI/POLLPRI flag
> > lets the kernel differentiate these two cases.
> >
> > Signed-off-by: Rob Clark 
>
> Hi,
>
> where would the UAPI documentation of this go?
> It seems to be missing.

Good question, I am not sure.  The poll() man page has a description,
but my usage doesn't fit that _exactly_ (but OTOH the description is a
bit vague).

> If a Wayland compositor is polling application fences to know which
> client buffer to use in its rendering, should the compositor poll with
> PRI or not? If a compositor polls with PRI, then all fences from all
> applications would always be PRI. Would that be harmful somehow or
> would it be beneficial?

I think a compositor would rather use the deadline ioctl and then poll
without PRI.  Otherwise you are giving an urgency signal to the fence
signaller which might not necessarily be needed.

The places where I expect PRI to be useful is more in mesa (things
like glFinish(), readpix, and other similar sorts of blocking APIs)

BR,
-R

>
>
> Thanks,
> pq
>
> > ---
> >  drivers/dma-buf/sync_file.c | 8 
> >  1 file changed, 8 insertions(+)
> >
> > diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> > index fb6ca1032885..c30b2085ee0a 100644
> > --- a/drivers/dma-buf/sync_file.c
> > +++ b/drivers/dma-buf/sync_file.c
> > @@ -192,6 +192,14 @@ static __poll_t sync_file_poll(struct file *file, 
> > poll_table *wait)
> >  {
> >   struct sync_file *sync_file = file->private_data;
> >
> > + /*
> > +  * The POLLPRI/EPOLLPRI flag can be used to signal that
> > +  * userspace wants the fence to signal ASAP, express this
> > +  * as an immediate deadline.
> > +  */
> > + if (poll_requested_events(wait) & EPOLLPRI)
> > + dma_fence_set_deadline(sync_file->fence, ktime_get());
> > +
> >   poll_wait(file, _file->wq, wait);
> >
> >   if (list_empty(_file->cb.node) &&
>


Re: [PATCH 1/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE

2023-02-20 Thread Dave Stevenson
On Mon, 20 Feb 2023 at 15:57, Simon Ser  wrote:
>
> On Monday, February 20th, 2023 at 16:49, Dave Stevenson 
>  wrote:
>
> > > + * User-space sets _prime_handle.fd with a DMA-BUF file descriptor to
> > > + * import, and gets back a GEM handle in _prime_handle.handle.
> > > + * _prime_handle.flags is unused.
> >
> > Is it worth explicitly stating that the handle would be released via
> > DRM_IOCTL_GEM_CLOSE? I've had userspace developers query how to
> > release imported handles in the past.
>
> v2 spells this out I think.

It does - thanks.
I was reading back through my emails from Friday and the weekend, and
hadn't noticed v2 :-/ Sorry for the noise.

  Dave


Re: [PATCH v4 05/14] dma-buf/sync_file: Add SET_DEADLINE ioctl

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 12:27 AM Christian König
 wrote:
>
> Am 18.02.23 um 22:15 schrieb Rob Clark:
> > From: Rob Clark 
> >
> > The initial purpose is for igt tests, but this would also be useful for
> > compositors that wait until close to vblank deadline to make decisions
> > about which frame to show.
> >
> > The igt tests can be found at:
> >
> > https://gitlab.freedesktop.org/robclark/igt-gpu-tools/-/commits/fence-deadline
> >
> > v2: Clarify the timebase, add link to igt tests
> >
> > Signed-off-by: Rob Clark 
> > ---
> >   drivers/dma-buf/sync_file.c| 19 +++
> >   include/uapi/linux/sync_file.h | 22 ++
> >   2 files changed, 41 insertions(+)
> >
> > diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> > index af57799c86ce..fb6ca1032885 100644
> > --- a/drivers/dma-buf/sync_file.c
> > +++ b/drivers/dma-buf/sync_file.c
> > @@ -350,6 +350,22 @@ static long sync_file_ioctl_fence_info(struct 
> > sync_file *sync_file,
> >   return ret;
> >   }
> >
> > +static int sync_file_ioctl_set_deadline(struct sync_file *sync_file,
> > + unsigned long arg)
> > +{
> > + struct sync_set_deadline ts;
> > +
> > + if (copy_from_user(, (void __user *)arg, sizeof(ts)))
> > + return -EFAULT;
> > +
> > + if (ts.pad)
> > + return -EINVAL;
> > +
> > + dma_fence_set_deadline(sync_file->fence, ktime_set(ts.tv_sec, 
> > ts.tv_nsec));
> > +
> > + return 0;
> > +}
> > +
> >   static long sync_file_ioctl(struct file *file, unsigned int cmd,
> >   unsigned long arg)
> >   {
> > @@ -362,6 +378,9 @@ static long sync_file_ioctl(struct file *file, unsigned 
> > int cmd,
> >   case SYNC_IOC_FILE_INFO:
> >   return sync_file_ioctl_fence_info(sync_file, arg);
> >
> > + case SYNC_IOC_SET_DEADLINE:
> > + return sync_file_ioctl_set_deadline(sync_file, arg);
> > +
> >   default:
> >   return -ENOTTY;
> >   }
> > diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
> > index ee2dcfb3d660..c8666580816f 100644
> > --- a/include/uapi/linux/sync_file.h
> > +++ b/include/uapi/linux/sync_file.h
> > @@ -67,6 +67,20 @@ struct sync_file_info {
> >   __u64   sync_fence_info;
> >   };
> >
> > +/**
> > + * struct sync_set_deadline - set a deadline on a fence
> > + * @tv_sec:  seconds elapsed since epoch
> > + * @tv_nsec: nanoseconds elapsed since the time given by the tv_sec
> > + * @pad: must be zero
> > + *
> > + * The timebase for the deadline is CLOCK_MONOTONIC (same as vblank)
> > + */
> > +struct sync_set_deadline {
> > + __s64   tv_sec;
> > + __s32   tv_nsec;
> > + __u32   pad;
>
> IIRC struct timespec defined this as time_t/long (which is horrible for
> an UAPI because of the sizeof(long) dependency), one possible
> alternative is to use 64bit nanoseconds from CLOCK_MONOTONIC (which is
> essentially ktime).
>
> Not 100% sure if there is any preferences documented, but I think the
> later might be better.

The original thought is that this maps directly to clock_gettime()
without extra conversion needed, and is similar to other pre-ktime_t
UAPI.  But OTOH if userspace wants to add an offset, it is maybe
better to convert completely to ns in userspace and use a u64 (as that
is what ns_to_ktime() uses).. (and OFC whatever decision here also
applies to the syncobj wait ioctls)

I'm leaning towards u64 CLOCK_MONOTONIC ns if no one has a good
argument against that.

BR,
-R

> Either way the patch is Acked-by: Christian König
>  for this patch.
>
> Regards,
> Christian.
>
> > +};
> > +
> >   #define SYNC_IOC_MAGIC  '>'
> >
> >   /**
> > @@ -95,4 +109,12 @@ struct sync_file_info {
> >*/
> >   #define SYNC_IOC_FILE_INFO  _IOWR(SYNC_IOC_MAGIC, 4, struct 
> > sync_file_info)
> >
> > +
> > +/**
> > + * DOC: SYNC_IOC_SET_DEADLINE - set a deadline on a fence
> > + *
> > + * Allows userspace to set a deadline on a fence, see 
> > dma_fence_set_deadline()
> > + */
> > +#define SYNC_IOC_SET_DEADLINE_IOW(SYNC_IOC_MAGIC, 5, struct 
> > sync_set_deadline)
> > +
> >   #endif /* _UAPI_LINUX_SYNC_H */
>


Re: [PATCH 1/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE

2023-02-20 Thread Simon Ser
On Monday, February 20th, 2023 at 16:49, Dave Stevenson 
 wrote:

> > + * User-space sets _prime_handle.fd with a DMA-BUF file descriptor to
> > + * import, and gets back a GEM handle in _prime_handle.handle.
> > + * _prime_handle.flags is unused.
> 
> Is it worth explicitly stating that the handle would be released via
> DRM_IOCTL_GEM_CLOSE? I've had userspace developers query how to
> release imported handles in the past.

v2 spells this out I think.


Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Tvrtko Ursulin



On 20/02/2023 15:45, Rob Clark wrote:

On Mon, Feb 20, 2023 at 4:22 AM Tvrtko Ursulin
 wrote:



On 17/02/2023 17:00, Rob Clark wrote:

On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
 wrote:


[snip]


adapted from your patches..  I think the basic idea of deadlines
(which includes "I want it NOW" ;-)) isn't controversial, but the
original idea got caught up in some bikeshed (what about compositors
that wait on fences in userspace to decide which surfaces to update in
the next frame), plus me getting busy and generally not having a good
plan for how to leverage this from VM guests (which is becoming
increasingly important for CrOS).  I think I can build on some ongoing
virtgpu fencing improvement work to solve the latter.  But now that we
have a 2nd use-case for this, it makes sense to respin.


Sure, I was looking at the old version already. It is interesting. But
also IMO needs quite a bit more work to approach achieving what is
implied from the name of the feature. It would need proper deadline
based sched job picking, and even then drm sched is mostly just a
frontend. So once past runnable status and jobs handed over to backend,
without further driver work it probably wouldn't be very effective past
very lightly loaded systems.


Yes, but all of that is not part of dma_fence ;-)


:) Okay.

Having said that, do we need a step back to think about whether adding
deadline to dma-fences is not making them something too much different
to what they were? Going from purely synchronisation primitive more
towards scheduling paradigms. Just to brainstorm if there will not be
any unintended consequences. I should mention this in your RFC thread
actually.


Perhaps "deadline" isn't quite the right name, but I haven't thought
of anything better.  It is really a hint to the fence signaller about
how soon it is interested in a result so the driver can factor that
into freq scaling decisions.  Maybe "goal" or some other term would be
better?


Don't know, no strong opinion on the name at the moment. For me it was 
more about the change of what type of side channel data is getting 
attached to dma-fence and whether it changes what the primitive is for.



I guess that can factor into scheduling decisions as well.. but we
already have priority for that.  My main interest is freq mgmt.

(Thankfully we don't have performance and efficiency cores to worry
about, like CPUs ;-))


A pretty common challenging usecase is still the single fullscreen
game, where scheduling isn't the problem, but landing at an
appropriate GPU freq absolutely is.  (UI workloads are perhaps more
interesting from a scheduler standpoint, but they generally aren't
challenging from a load/freq standpoint.)


Challenging as in picking the right operating point? Might be latency
impacted (and so user perceived UI smoothness) due missing waitboost for
anything syncobj related. I don't know if anything to measure that
exists currently though. Assuming it is measurable then the question
would be is it perceivable.

Fwiw, the original motivation of the series was to implement something
akin to i915 pageflip boosting without having to abandon the atomic
helpers.  (And, I guess it would also let i915 preserve that feature
if it switched to atomic helpers.. I'm unsure if there are still other
things blocking i915's migration.)


Question for display folks I guess.


Then if we fast forward to a world where schedulers perhaps become fully
deadline aware (we even had this for i915 few years back) then the
question will be does equating waits with immediate deadlines still
works. Maybe not too well because we wouldn't have the ability to
distinguish between the "someone is waiting" signal from the otherwise
propagated deadlines.


Is there any other way to handle a wait boost than expressing it as an
ASAP deadline?


A leading question or just a question? Nothing springs to my mind at the
moment.


Just a question.  The immediate deadline is the only thing that makes
sense to me, but that could be because I'm looking at it from the
perspective of also trying to handle the case where missing vblank
reduces utilization and provides the wrong signal to gpufreq.. i915
already has a way to handle this internally, but it involves bypassing
the atomic helpers, which isn't a thing I want to encourage other
drivers to do.  And completely doesn't work for situations where the
gpu and display are separate devices.


Right, there is yet another angle to discuss with Daniel here who AFAIR 
was a bit against i915 priority inheritance going past a single device 
instance. In which case DRI_PRIME=1 would lose the ability to boost 
frame buffer dependency chains. Opens up the question of deadline 
inheritance across different drivers too. Or perhaps Daniel would be 
okay with this working if implemented at the dma-fence layer.


Regards,

Tvrtko


Re: [PATCH v4 10/14] drm/vblank: Add helper to get next vblank time

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 1:08 AM Pekka Paalanen  wrote:
>
> On Sat, 18 Feb 2023 13:15:53 -0800
> Rob Clark  wrote:
>
> > From: Rob Clark 
> >
> > Will be used in the next commit to set a deadline on fences that an
> > atomic update is waiting on.
> >
> > Signed-off-by: Rob Clark 
> > ---
> >  drivers/gpu/drm/drm_vblank.c | 32 
> >  include/drm/drm_vblank.h |  1 +
> >  2 files changed, 33 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
> > index 2ff31717a3de..caf25ebb34c5 100644
> > --- a/drivers/gpu/drm/drm_vblank.c
> > +++ b/drivers/gpu/drm/drm_vblank.c
> > @@ -980,6 +980,38 @@ u64 drm_crtc_vblank_count_and_time(struct drm_crtc 
> > *crtc,
> >  }
> >  EXPORT_SYMBOL(drm_crtc_vblank_count_and_time);
> >
> > +/**
> > + * drm_crtc_next_vblank_time - calculate the time of the next vblank
> > + * @crtc: the crtc for which to calculate next vblank time
> > + * @vblanktime: pointer to time to receive the next vblank timestamp.
> > + *
> > + * Calculate the expected time of the next vblank based on time of previous
> > + * vblank and frame duration
>
> Hi,
>
> for VRR this targets the highest frame rate possible for the current
> VRR mode, right?
>

It is based on vblank->framedur_ns which is in turn based on
mode->crtc_clock.  Presumably for VRR that ends up being a maximum?

BR,
-R


>
> Thanks,
> pq
>
> > + */
> > +int drm_crtc_next_vblank_time(struct drm_crtc *crtc, ktime_t *vblanktime)
> > +{
> > + unsigned int pipe = drm_crtc_index(crtc);
> > + struct drm_vblank_crtc *vblank = >dev->vblank[pipe];
> > + u64 count;
> > +
> > + if (!vblank->framedur_ns)
> > + return -EINVAL;
> > +
> > + count = drm_vblank_count_and_time(crtc->dev, pipe, vblanktime);
> > +
> > + /*
> > +  * If we don't get a valid count, then we probably also don't
> > +  * have a valid time:
> > +  */
> > + if (!count)
> > + return -EINVAL;
> > +
> > + *vblanktime = ktime_add(*vblanktime, 
> > ns_to_ktime(vblank->framedur_ns));
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(drm_crtc_next_vblank_time);
> > +
> >  static void send_vblank_event(struct drm_device *dev,
> >   struct drm_pending_vblank_event *e,
> >   u64 seq, ktime_t now)
> > diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h
> > index 733a3e2d1d10..a63bc2c92f3c 100644
> > --- a/include/drm/drm_vblank.h
> > +++ b/include/drm/drm_vblank.h
> > @@ -230,6 +230,7 @@ bool drm_dev_has_vblank(const struct drm_device *dev);
> >  u64 drm_crtc_vblank_count(struct drm_crtc *crtc);
> >  u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
> >  ktime_t *vblanktime);
> > +int drm_crtc_next_vblank_time(struct drm_crtc *crtc, ktime_t *vblanktime);
> >  void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
> >  struct drm_pending_vblank_event *e);
> >  void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
>


Re: [PATCH 08/27] habanalabs: add info when FD released while device still in use

2023-02-20 Thread Stanislaw Gruszka
On Fri, Feb 17, 2023 at 11:34:39AM +, Tomer Tayar wrote:
 > 
> > Ok, just place replace compose_device_in_use_info() with snprintf().
> > I don't think you need custom implementation of snprintf().
> 
> compose_device_in_use_info() was added to handle in a single place the 
> snprintf() return value and the buffer pointer moving.
> However, you are correct and it is too much here, as the local buffer size is 
> set with a value that should be enough for max possible print.
> We will remove compose_device_in_use_info() and use snprintf() directly.

Actually the safer version would be scnprintf() since for that function
return value could not be bigger than passed len. Usage then could be
as simple as:

n += scnprintf(buf + n, len - n, ...);
n += scnprintf(buf + n, len - n, ...);

Regards
Stanislaw



Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 3:33 AM Tvrtko Ursulin
 wrote:
>
>
> On 17/02/2023 20:45, Rodrigo Vivi wrote:
> > On Fri, Feb 17, 2023 at 09:00:49AM -0800, Rob Clark wrote:
> >> On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
> >>  wrote:
> >>>
> >>>
> >>> On 17/02/2023 14:55, Rob Clark wrote:
>  On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin
>   wrote:
> >
> >
> > On 16/02/2023 18:19, Rodrigo Vivi wrote:
> >> On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote:
> >>> On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin
> >>>  wrote:
> 
>  From: Tvrtko Ursulin 
> 
>  In i915 we have this concept of "wait boosting" where we give a 
>  priority boost
>  for instance to fences which are actively waited upon from 
>  userspace. This has
>  it's pros and cons and can certainly be discussed at lenght. However 
>  fact is
>  some workloads really like it.
> 
>  Problem is that with the arrival of drm syncobj and a new userspace 
>  waiting
>  entry point it added, the waitboost mechanism was bypassed. Hence I 
>  cooked up
>  this mini series really (really) quickly to see if some discussion 
>  can be had.
> 
>  It adds a concept of "wait count" to dma fence, which is incremented 
>  for every
>  explicit dma_fence_enable_sw_signaling and 
>  dma_fence_add_wait_callback (like
>  dma_fence_add_callback but from explicit/userspace wait paths).
> >>>
> >>> I was thinking about a similar thing, but in the context of dma_fence
> >>> (or rather sync_file) fd poll()ing.  How does the kernel differentiate
> >>> between "housekeeping" poll()ers that don't want to trigger boost but
> >>> simply know when to do cleanup, and waiters who are waiting with some
> >>> urgency.  I think we could use EPOLLPRI for this purpose.
> >>>
> >>> Not sure how that translates to waits via the syncobj.  But I think we
> >>> want to let userspace give some hint about urgent vs housekeeping
> >>> waits.
> >>
> >> Should the hint be on the waits, or should the hints be on the executed
> >> context?
> >>
> >> In the end we need some way to quickly ramp-up the frequency to avoid
> >> the execution bubbles.
> >>
> >> waitboost is trying to guess that, but in some cases it guess wrong
> >> and waste power.
> >
> > Do we have a list of workloads which shows who benefits and who loses
> > from the current implementation of waitboost?
> >> btw, this is something that other drivers might need:
> >>
> >> https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883
> >> Cc: Alex Deucher 
> >
> > I have several issues with the context hint if it would directly
> > influence frequency selection in the "more power" direction.
> >
> > First of all, assume a context hint would replace the waitboost. Which
> > applications would need to set it to restore the lost performance and
> > how would they set it?
> >
> > Then I don't even think userspace necessarily knows. Think of a layer
> > like OpenCL. It doesn't really know in advance the profile of
> > submissions vs waits. It depends on the CPU vs GPU speed, so hardware
> > generation, and the actual size of the workload which can be influenced
> > by the application (or user) and not the library.
> >
> > The approach also lends itself well for the "arms race" where every
> > application can say "Me me me, I am the most important workload there 
> > is!".
> 
>  since there is discussion happening in two places:
> 
>  https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433
> 
>  What I think you might want is a ctx boost_mask which lets an app or
>  driver disable certain boost signals/classes.  Where fence waits is
>  one class of boost, but hypothetical other signals like touchscreen
>  (or other) input events could be another class of boost.  A compute
>  workload might be interested in fence wait boosts but could care less
>  about input events.
> >>>
> >>> I think it can only be apps which could have any chance knowing whether
> >>> their use of a library is latency sensitive or not. Which means new
> >>> library extensions and their adoption. So I have some strong reservation
> >>> that route is feasible.
> >>>
> >>> Or we tie with priority which many drivers do. Normal and above gets the
> >>> boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH).
> >>
> >> yeah, that sounds reasonable.
> >>
> >
> > on that gitlab-issue discussion Emma Anholt was against using the priority
> > to influence frequency since that should be more about latency.
> >
> > or we are talking about something different priority here?
>
> As Rob already explained - I was suggesting 

Re: [PATCH 1/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE

2023-02-20 Thread Dave Stevenson
Hi Simon

On Thu, 16 Feb 2023 at 13:09, Simon Ser  wrote:
>
> Signed-off-by: Simon Ser 
> Cc: Daniel Vetter 
> Cc: Pekka Paalanen 
> Cc: Daniel Stone 
> ---
>  include/uapi/drm/drm.h | 17 +
>  1 file changed, 17 insertions(+)
>
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 4cb956a52aee..54b2313c8332 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -1012,7 +1012,24 @@ extern "C" {
>  #define DRM_IOCTL_UNLOCK   DRM_IOW( 0x2b, struct drm_lock)
>  #define DRM_IOCTL_FINISH   DRM_IOW( 0x2c, struct drm_lock)
>
> +/**
> + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD.
> + *
> + * User-space sets _prime_handle.handle with the GEM handle to export and
> + * _prime_handle.flags, and gets back a DMA-BUF file descriptor in
> + * _prime_handle.fd.
> + */
>  #define DRM_IOCTL_PRIME_HANDLE_TO_FDDRM_IOWR(0x2d, struct 
> drm_prime_handle)
> +/**
> + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle.
> + *
> + * User-space sets _prime_handle.fd with a DMA-BUF file descriptor to
> + * import, and gets back a GEM handle in _prime_handle.handle.
> + * _prime_handle.flags is unused.

Is it worth explicitly stating that the handle would be released via
DRM_IOCTL_GEM_CLOSE? I've had userspace developers query how to
release imported handles in the past.

  Dave

> + *
> + * If an existing GEM handle refers to the memory object backing the DMA-BUF,
> + * that GEM handle is returned.
> + */
>  #define DRM_IOCTL_PRIME_FD_TO_HANDLEDRM_IOWR(0x2e, struct 
> drm_prime_handle)
>
>  #define DRM_IOCTL_AGP_ACQUIRE  DRM_IO(  0x30)
> --
> 2.39.1
>
>


Re: [PATCH v4 14/14] drm/i915: Add deadline based boost support

2023-02-20 Thread Tvrtko Ursulin



On 18/02/2023 21:15, Rob Clark wrote:

From: Rob Clark 

Signed-off-by: Rob Clark 
---

This should probably be re-written by someone who knows the i915
request/timeline stuff better, to deal with non-immediate deadlines.
But as-is I think this should be enough to handle the case where
we want syncobj waits to trigger boost.


Yeah, there are endless possibilities. :) But I think it is effectively 
similar enough to current waitboosting (when waits are done using the 
i915 specific ioctl). So as a first step I'll try to organize some 
internal power and performance testing, at least Chromebook focused, to 
see if modern userspace (syncobj based) even benefits and does not by 
some chance regress over the board.


Regards,

Tvrtko



  drivers/gpu/drm/i915/i915_driver.c  |  2 +-
  drivers/gpu/drm/i915/i915_request.c | 20 
  2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_driver.c 
b/drivers/gpu/drm/i915/i915_driver.c
index cf1c0970ecb4..bd40b7bcb38a 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1781,7 +1781,7 @@ static const struct drm_driver i915_drm_driver = {
.driver_features =
DRIVER_GEM |
DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
-   DRIVER_SYNCOBJ_TIMELINE,
+   DRIVER_SYNCOBJ_TIMELINE | DRIVER_SYNCOBJ_DEADLINE,
.release = i915_driver_release,
.open = i915_driver_open,
.lastclose = i915_driver_lastclose,
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 7503dcb9043b..44491e7e214c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -97,6 +97,25 @@ static bool i915_fence_enable_signaling(struct dma_fence 
*fence)
return i915_request_enable_breadcrumb(to_request(fence));
  }
  
+static void i915_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)

+{
+   struct i915_request *rq = to_request(fence);
+
+   if (i915_request_completed(rq))
+   return;
+
+   if (i915_request_started(rq))
+   return;
+
+   /*
+* TODO something more clever for deadlines that are in the
+* future.  I think probably track the nearest deadline in
+* rq->timeline and set timer to trigger boost accordingly?
+*/
+
+   intel_rps_boost(rq);
+}
+
  static signed long i915_fence_wait(struct dma_fence *fence,
   bool interruptible,
   signed long timeout)
@@ -182,6 +201,7 @@ const struct dma_fence_ops i915_fence_ops = {
.signaled = i915_fence_signaled,
.wait = i915_fence_wait,
.release = i915_fence_release,
+   .set_deadline = i915_fence_set_deadline,
  };
  
  static void irq_execute_cb(struct irq_work *wrk)


Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 4:22 AM Tvrtko Ursulin
 wrote:
>
>
> On 17/02/2023 17:00, Rob Clark wrote:
> > On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
> >  wrote:
>
> [snip]
>
> >>> adapted from your patches..  I think the basic idea of deadlines
> >>> (which includes "I want it NOW" ;-)) isn't controversial, but the
> >>> original idea got caught up in some bikeshed (what about compositors
> >>> that wait on fences in userspace to decide which surfaces to update in
> >>> the next frame), plus me getting busy and generally not having a good
> >>> plan for how to leverage this from VM guests (which is becoming
> >>> increasingly important for CrOS).  I think I can build on some ongoing
> >>> virtgpu fencing improvement work to solve the latter.  But now that we
> >>> have a 2nd use-case for this, it makes sense to respin.
> >>
> >> Sure, I was looking at the old version already. It is interesting. But
> >> also IMO needs quite a bit more work to approach achieving what is
> >> implied from the name of the feature. It would need proper deadline
> >> based sched job picking, and even then drm sched is mostly just a
> >> frontend. So once past runnable status and jobs handed over to backend,
> >> without further driver work it probably wouldn't be very effective past
> >> very lightly loaded systems.
> >
> > Yes, but all of that is not part of dma_fence ;-)
>
> :) Okay.
>
> Having said that, do we need a step back to think about whether adding
> deadline to dma-fences is not making them something too much different
> to what they were? Going from purely synchronisation primitive more
> towards scheduling paradigms. Just to brainstorm if there will not be
> any unintended consequences. I should mention this in your RFC thread
> actually.

Perhaps "deadline" isn't quite the right name, but I haven't thought
of anything better.  It is really a hint to the fence signaller about
how soon it is interested in a result so the driver can factor that
into freq scaling decisions.  Maybe "goal" or some other term would be
better?

I guess that can factor into scheduling decisions as well.. but we
already have priority for that.  My main interest is freq mgmt.

(Thankfully we don't have performance and efficiency cores to worry
about, like CPUs ;-))

> > A pretty common challenging usecase is still the single fullscreen
> > game, where scheduling isn't the problem, but landing at an
> > appropriate GPU freq absolutely is.  (UI workloads are perhaps more
> > interesting from a scheduler standpoint, but they generally aren't
> > challenging from a load/freq standpoint.)
>
> Challenging as in picking the right operating point? Might be latency
> impacted (and so user perceived UI smoothness) due missing waitboost for
> anything syncobj related. I don't know if anything to measure that
> exists currently though. Assuming it is measurable then the question
> would be is it perceivable.
> > Fwiw, the original motivation of the series was to implement something
> > akin to i915 pageflip boosting without having to abandon the atomic
> > helpers.  (And, I guess it would also let i915 preserve that feature
> > if it switched to atomic helpers.. I'm unsure if there are still other
> > things blocking i915's migration.)
>
> Question for display folks I guess.
>
> >> Then if we fast forward to a world where schedulers perhaps become fully
> >> deadline aware (we even had this for i915 few years back) then the
> >> question will be does equating waits with immediate deadlines still
> >> works. Maybe not too well because we wouldn't have the ability to
> >> distinguish between the "someone is waiting" signal from the otherwise
> >> propagated deadlines.
> >
> > Is there any other way to handle a wait boost than expressing it as an
> > ASAP deadline?
>
> A leading question or just a question? Nothing springs to my mind at the
> moment.

Just a question.  The immediate deadline is the only thing that makes
sense to me, but that could be because I'm looking at it from the
perspective of also trying to handle the case where missing vblank
reduces utilization and provides the wrong signal to gpufreq.. i915
already has a way to handle this internally, but it involves bypassing
the atomic helpers, which isn't a thing I want to encourage other
drivers to do.  And completely doesn't work for situations where the
gpu and display are separate devices.

BR,
-R

> Regards,
>
> Tvrtko


Re: [PATCH 0/4] drm/displayid: use primary use case to figure out non-desktop

2023-02-20 Thread Dmitry Osipenko
On 2/16/23 23:44, Jani Nikula wrote:
> Mostly this is prep work and plumbing for easier use of displayid
> structure version and primary use case for parsing the displayid blocks,
> but it can be nicely used for figuring out non-desktop too.
> 
> Completely untested. :)
> 
> BR,
> Jani.
> 
> Cc: Iaroslav Boliukin 
> Cc: Dmitry Osipenko 
> 
> Jani Nikula (4):
>   drm/displayid: add displayid_get_header() and check bounds better
>   drm/displayid: return struct displayid_header from
> validate_displayid()
>   drm/displayid: provide access to DisplayID version and primary use
> case
>   drm/edid: update non-desktop use also from DisplayID
> 
>  drivers/gpu/drm/drm_displayid.c | 62 -
>  drivers/gpu/drm/drm_edid.c  | 25 +
>  include/drm/drm_displayid.h | 12 ++-
>  3 files changed, 89 insertions(+), 10 deletions(-)
> 

It works now without the EDID quirk, thanks!

Tested-by: Dmitry Osipenko 

-- 
Best regards,
Dmitry



Re: [PATCH 17/17] drm/cirrus: Use VGA macro constants to unblank

2023-02-20 Thread Gerd Hoffmann
On Mon, Feb 20, 2023 at 03:22:03PM +0100, Thomas Zimmermann wrote:
> Hi
> 
> Am 16.02.23 um 12:33 schrieb Gerd Hoffmann:
> > On Wed, Feb 15, 2023 at 05:15:17PM +0100, Thomas Zimmermann wrote:
> > > Set the VGA bit for unblanking with macro constants instead of magic
> > > values. No functional changes.
> > 
> > blank/unblank should work simliar to bochs (see commit 250e743915d4),
> > that is maybe a nice thing to add of you modernize the driver anyway.
> > 
> > take care,
> >Gerd
> > 
> 
> Do you have comments on the other patches?

Checked briefly only, looked sane overall.  Seems the blit and format
conversions helpers improved alot since I've added them initially (don't
follow drm that closely any more, busy with other stuff), nice to see
cirrus being updated to that and getting dirty tracking support.

Acked-by: Gerd Hoffmann 

take care,
  Gerd



Re: [PATCH 01/27] habanalabs/gaudi2: increase user interrupt grace time

2023-02-20 Thread Stanislaw Gruszka
On Sun, Feb 12, 2023 at 10:44:28PM +0200, Oded Gabbay wrote:
> From: Ofir Bitton 
> 
> Currently we support scenarios where a timestamp registration request
> of a certain offset is received during the interrupt handling of the
> same offset. In this case we give a grace period of up to 100us for
> the interrupt handler to finish.
> It seems that sometimes the interrupt handling takes more than expected,
> and therefore this path should be optimized. Until that happens, let's
> increase the grace period in order not to reach timeout which will
> cause user call to be rejected.
> 
> Signed-off-by: Ofir Bitton 
> Reviewed-by: Oded Gabbay 
> Signed-off-by: Oded Gabbay 

Reviewed-by: Stanislaw Gruszka  for the 
whole series.



Re: [Intel-gfx] [RFC 6/9] drm/syncobj: Mark syncobj waits as external waiters

2023-02-20 Thread Rob Clark
On Mon, Feb 20, 2023 at 5:19 AM Tvrtko Ursulin
 wrote:
>
>
> On 18/02/2023 19:56, Rob Clark wrote:
> > On Thu, Feb 16, 2023 at 2:59 AM Tvrtko Ursulin
> >  wrote:
> >>
> >> From: Tvrtko Ursulin 
> >>
> >> Use the previously added dma-fence tracking of explicit waiters.
> >>
> >> Signed-off-by: Tvrtko Ursulin 
> >> ---
> >>   drivers/gpu/drm/drm_syncobj.c | 6 +++---
> >>   1 file changed, 3 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> >> index 0c2be8360525..776b90774a64 100644
> >> --- a/drivers/gpu/drm/drm_syncobj.c
> >> +++ b/drivers/gpu/drm/drm_syncobj.c
> >> @@ -1065,9 +1065,9 @@ static signed long 
> >> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
> >>  if ((flags & 
> >> DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
> >>  dma_fence_is_signaled(fence) ||
> >>  (!entries[i].fence_cb.func &&
> >> -dma_fence_add_callback(fence,
> >> -   [i].fence_cb,
> >> -   
> >> syncobj_wait_fence_func))) {
> >> +dma_fence_add_wait_callback(fence,
> >> +
> >> [i].fence_cb,
> >> +
> >> syncobj_wait_fence_func))) {
> >
> > I think this isn't really what you want if count > 1, because you
> > wouldn't be notifying the fence signaler of fence n+1 until the wait
> > on fence n completed
>
> Are you sure? After some staring all I can see is that all callbacks are
> added before the first sleep.

Ahh, yes, you are right

BR,
-R


Re: [PATCH v2 3/9] drm/vc4: hdmi: Add Broadcast RGB property to allow override of RGB range

2023-02-20 Thread Dave Stevenson
Hi Hans

On Sat, 18 Feb 2023 at 11:33, Hans Verkuil  wrote:
>
> Hi Maxime, Dave,
>
> On 26/01/2023 14:46, Maxime Ripard wrote:
> > From: Dave Stevenson 
> >
> > Copy Intel's "Broadcast RGB" property semantics to add manual override
> > of the HDMI pixel range for monitors that don't abide by the content
> > of the AVI Infoframe.
>
> Do we have to copy that property as-is?

Firstly I'll agree with your later comment that having this control
allows testing of a range of output modes, and working around HDMI
sinks that have dodgy implementations.
(In our vendor kernel we now also have a property to override the
kernel chosen output format to enable testing of YCbCr4:4:4 and 4:2:2
output).

The DRM subsystem has the requirement for an open-source userspace
project to be using any new property before it will be merged [1].
This property already exists for i915 and gma-500, therefore avoids
that requirement.

There are objections to the UAPI for Broadcast RGB [2], but if it's
good enough for the existing implementations then there can be no
objection to it being implemented in the same way for other drivers.
Otherwise it is a missing feature of the DRM API, and the linked
discussion is realistically at least a year away from being resolved.
Why bury our heads in the sand for that period?

[1] 
https://dri.freedesktop.org/docs/drm/gpu/drm-uapi.html#open-source-userspace-requirements
[2] https://lists.freedesktop.org/archives/dri-devel/2023-February/391061.html

> First of all, I think this should really be a drm-level property, rather than
> a driver property: RGB Quantization Range mismatches are the bane of my life,
> and I think a way to override this would help everyone.

Linked to above, if it were the preferred method for controlling this
then I would expect it to become a drm-level property.

> Secondly, I hate the name they came up with: 'Broadcast RGB' is pretty 
> meaningless.
> Can't we stick to something closer to what the CTA-861/HDMI specs use, which 
> is
> 'RGB Quantization Range'? So either use that, or just 'RGB Range'.
>
> In addition, 'Limited 16:235' should just be 'Limited' since the actual range
> depends on the bits-per-color-component.

It's documented UAPI with those names[3], therefore any change would
be a change to user-space's expectation and a regression. At least by
sticking with the same names then any user space implementation that
exists for i915 or gma-500 will also work in the same way on vc4.

[3] 
https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#existing-kms-properties

> >
> > Signed-off-by: Dave Stevenson 
> > Signed-off-by: Maxime Ripard 
> > ---
> >  drivers/gpu/drm/vc4/vc4_hdmi.c | 97 
> > --
> >  drivers/gpu/drm/vc4/vc4_hdmi.h |  9 
> >  2 files changed, 102 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
> > index 4b3bf77bb5cd..78749c6fa837 100644
> > --- a/drivers/gpu/drm/vc4/vc4_hdmi.c
> > +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
> > @@ -150,10 +150,16 @@ static bool vc4_hdmi_mode_needs_scrambling(const 
> > struct drm_display_mode *mode,
> >  }
> >
> >  static bool vc4_hdmi_is_full_range_rgb(struct vc4_hdmi *vc4_hdmi,
> > -const struct drm_display_mode *mode)
> > +struct vc4_hdmi_connector_state 
> > *vc4_state)
> >  {
> > + const struct drm_display_mode *mode = _hdmi->saved_adjusted_mode;
> >   struct drm_display_info *display = _hdmi->connector.display_info;
> >
> > + if (vc4_state->broadcast_rgb == VC4_HDMI_BROADCAST_RGB_LIMITED)
> > + return false;
> > + else if (vc4_state->broadcast_rgb == VC4_HDMI_BROADCAST_RGB_FULL)
> > + return true;
> > +
> >   return !display->is_hdmi ||
> >   drm_default_rgb_quant_range(mode) == 
> > HDMI_QUANTIZATION_RANGE_FULL;
> >  }
> > @@ -524,8 +530,12 @@ static int vc4_hdmi_connector_atomic_check(struct 
> > drm_connector *connector,
> >  {
> >   struct drm_connector_state *old_state =
> >   drm_atomic_get_old_connector_state(state, connector);
> > + struct vc4_hdmi_connector_state *old_vc4_state =
> > + conn_state_to_vc4_hdmi_conn_state(old_state);
> >   struct drm_connector_state *new_state =
> >   drm_atomic_get_new_connector_state(state, connector);
> > + struct vc4_hdmi_connector_state *new_vc4_state =
> > + conn_state_to_vc4_hdmi_conn_state(new_state);
> >   struct drm_crtc *crtc = new_state->crtc;
> >
> >   if (!crtc)
> > @@ -558,6 +568,7 @@ static int vc4_hdmi_connector_atomic_check(struct 
> > drm_connector *connector,
> >   }
> >
> >   if (old_state->colorspace != new_state->colorspace ||
> > + old_vc4_state->broadcast_rgb != new_vc4_state->broadcast_rgb ||
>
> The problem with this is that this will cause a mode change, even though all
> that is necessary is to update the csc matrix and AVI 

Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE

2023-02-20 Thread Matthew Wilcox
On Mon, Feb 20, 2023 at 03:00:59PM +0100, Danilo Krummrich wrote:
> On 2/17/23 20:38, Matthew Wilcox wrote:
> > On Fri, Feb 17, 2023 at 02:44:10PM +0100, Danilo Krummrich wrote:
> > > Generic components making use of the maple tree (such as the
> > > DRM GPUVA Manager) delegate the responsibility of ensuring mutual
> > > exclusion to their users.
> > > 
> > > While such components could inherit the concept of an external lock,
> > > some users might just serialize the access to the component and hence to
> > > the internal maple tree.
> > > 
> > > In order to allow such use cases, add a new flag MT_FLAGS_LOCK_NONE to
> > > indicate not to do any internal lockdep checks.
> > 
> > I'm really against this change.
> > 
> > First, we really should check that users have their locking right.
> > It's bitten us so many times when they get it wrong.
> 
> In case of the DRM GPUVA manager, some users might serialize the access to
> the GPUVA manager and hence to it's maple tree instances, e.g. through the
> drm_gpu_scheduler. In such a case ensuring to hold a lock would be a bit
> pointless and I wouldn't really know how to "sell" this to potential users
> of the GPUVA manager.

This is why we like people to use the spinlock embedded in the tree.
There's nothing for the user to care about.  If the access really is
serialised, acquiring/releasing the uncontended spinlock is a minimal
cost compared to all the other things that will happen while modifying
the tree.

> > Second, having a lock allows us to defragment the slab cache.  The
> > patches to do that haven't gone anywhere recently, but if we drop the
> > requirement now, we'll never be able to compact ranges of memory that
> > have slabs allocated to them.
> > 
> 
> Not sure if I get that, do you mind explaining a bit how this would affect
> other users of the maple tree, such as my use case, the GPUVA manager?

When we want to free a slab in order to defragment memory, we need
to relocate all the objects allocated within that slab.  To do that
for the maple tree node cache, for each node in this particular slab,
we'll need to walk up to the top of the tree and lock it.  We can then
allocate a new node from a different slab, change the parent to point
to the new node and drop the lock.  After an RCU delay, we can free the
slab and create a larger contiguous block of memory.

As I said, this is somewhat hypothetical in that there's no current
code in the tree to reclaim slabs when we're trying to defragment
memory.  And that's because it's hard to do.  The XArray and maple
tree were designed to make it possible for their slabs.


Re: [PATCH drm-next v2 04/16] maple_tree: add flag MT_FLAGS_LOCK_NONE

2023-02-20 Thread Danilo Krummrich

On 2/17/23 20:38, Matthew Wilcox wrote:

On Fri, Feb 17, 2023 at 02:44:10PM +0100, Danilo Krummrich wrote:

Generic components making use of the maple tree (such as the
DRM GPUVA Manager) delegate the responsibility of ensuring mutual
exclusion to their users.

While such components could inherit the concept of an external lock,
some users might just serialize the access to the component and hence to
the internal maple tree.

In order to allow such use cases, add a new flag MT_FLAGS_LOCK_NONE to
indicate not to do any internal lockdep checks.


I'm really against this change.

First, we really should check that users have their locking right.
It's bitten us so many times when they get it wrong.


In case of the DRM GPUVA manager, some users might serialize the access 
to the GPUVA manager and hence to it's maple tree instances, e.g. 
through the drm_gpu_scheduler. In such a case ensuring to hold a lock 
would be a bit pointless and I wouldn't really know how to "sell" this 
to potential users of the GPUVA manager.




Second, having a lock allows us to defragment the slab cache.  The
patches to do that haven't gone anywhere recently, but if we drop the
requirement now, we'll never be able to compact ranges of memory that
have slabs allocated to them.



Not sure if I get that, do you mind explaining a bit how this would 
affect other users of the maple tree, such as my use case, the GPUVA 
manager?




Re: [PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro

2023-02-20 Thread Danilo Krummrich

On 2/17/23 19:34, Liam R. Howlett wrote:

* Danilo Krummrich  [230217 08:44]:

Split up the MA_STATE() macro such that components using the maple tree
can easily inherit from struct ma_state and build custom tree walk
macros to hide their internals from users.

Example:

struct sample_iter {
struct ma_state mas;
struct sample_mgr *mgr;
struct sample_entry *entry;
};

\#define SAMPLE_ITER(name, __mgr) \
struct sample_iter name = { \
.mas = __MA_STATE(&(__mgr)->mt, 0, 0),
.mgr = __mgr,
.entry = NULL,
}


I see this patch is to allow for anonymous maple states, this looks
good.

I've a lengthy comment about the iterator that I'm adding here to head
off anyone that may copy your example below.



\#define sample_iter_for_each_range(it__, start__, end__) \
for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, 
end__ - 1); \
 (it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1))


I see you've added something like the above in your patch set as well.
I'd like to point out that the index isn't the only state information
that needs to be altered here, and in fact, this could go very wrong.

The maple state has a node and an offset within that node.  If you set
the index to lower than the current position of your iterator and call
mas_find() then what happens is somewhat undefined.  I expect you will
get the wrong value (most likely either the current value or the very
next one that the iterator is already pointing to).  I believe you have
been using a fresh maple state for each iterator in your patches, but I
haven't had a deep look into your code yet.


Yes, I'm aware that I'd need to reset the whole iterator in order to 
re-use it.


Regarding the other considerations of the iterator design please see my 
answer to Matthew.




We have methods of resetting the iterator and set the range (mas_set()
and mas_set_range()) which are safe for what you are doing, but they
will start the walk from the root node to the index again.

So, if you know what you are doing is safe, then the way you have
written it will work, but it's worth mentioning that this could occur.

It is also worth pointing out that it would be much safer to use a
function to do the above so you get type safety.. and I was asked to add
this to the VMA interface by Linus [1], which is on its way upstream [2].

1. 
https://lore.kernel.org/linux-mm/CAHk-=wg9wqxbgkndkd2bqocnn73rdswuwsavbb7t-tekyke...@mail.gmail.com/
2. 
https://lore.kernel.org/linux-mm/20230120162650.984577-1-liam.howl...@oracle.com/


You mean having wrappers like sample_find() instead of directly using 
mas_find()?






Signed-off-by: Danilo Krummrich 
---
  include/linux/maple_tree.h | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index e594db58a0f1..ca04c900e51a 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -424,8 +424,8 @@ struct ma_wr_state {
  #define MA_ERROR(err) \
((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
  
-#define MA_STATE(name, mt, first, end)	\

-   struct ma_state name = {\
+#define __MA_STATE(mt, first, end) \
+   {   \
.tree = mt, \
.index = first, \
.last = end,\
@@ -435,6 +435,9 @@ struct ma_wr_state {
.alloc = NULL,  \
}
  
+#define MA_STATE(name, mt, first, end)	\

+   struct ma_state name = __MA_STATE(mt, first, end)
+
  #define MA_WR_STATE(name, ma_state, wr_entry) \
struct ma_wr_state name = { \
.mas = ma_state,\
--
2.39.1







Re: [PATCH drm-next v2 03/16] maple_tree: split up MA_STATE() macro

2023-02-20 Thread Danilo Krummrich

On 2/17/23 20:45, Matthew Wilcox wrote:

On Fri, Feb 17, 2023 at 02:44:09PM +0100, Danilo Krummrich wrote:

\#define SAMPLE_ITER(name, __mgr) \
struct sample_iter name = { \
.mas = __MA_STATE(&(__mgr)->mt, 0, 0),


This is usually called MA_STATE_INIT()


Yep, that's better.




#define sample_iter_for_each_range(it__, start__, end__) \
for ((it__).mas.index = start__, (it__).entry = mas_find(&(it__).mas, 
end__ - 1); \
 (it__).entry; (it__).entry = mas_find(&(it__).mas, end__ - 1))


This is a bad iterator design.  It's usually best to do this:

struct sample *sample;
SAMPLE_ITERATOR(si, min);

sample_iter_for_each(, sample, max) {
frob(mgr, sample);
}




The reason why I don't set index (and max) within SAMPLE_ITER() is that 
the range to iterate might not yet be known at that time, so I thought 
it could just be set in sample_iter_for_each_range().


However, I see that this might prevail users to assume that it's safe to 
iterate a range based on the same iterator instance multiple times 
though. Instead users should maybe move the tree walk to another 
function once the range is known.


The reason for the payload structure to be part of the iterator is that 
I have two maple trees in the GPUVA manager and hence two different 
payload types. Within the iterator structure they're just within a union 
allowing me to implement the tree walk macro just once rather than twice.


Anyway, I feel like your approach looks cleaner, hence I'll change it.


I don't mind splitting apart MA_STATE_INIT from MA_STATE, and if you
do that, we can also use it in VMA_ITERATOR.





Re: [PATCH 17/17] drm/cirrus: Use VGA macro constants to unblank

2023-02-20 Thread Thomas Zimmermann

Hi

Am 16.02.23 um 12:33 schrieb Gerd Hoffmann:

On Wed, Feb 15, 2023 at 05:15:17PM +0100, Thomas Zimmermann wrote:

Set the VGA bit for unblanking with macro constants instead of magic
values. No functional changes.


blank/unblank should work simliar to bochs (see commit 250e743915d4),
that is maybe a nice thing to add of you modernize the driver anyway.

take care,
   Gerd



Do you have comments on the other patches?

Best regards
Thomas

--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 3/6] drm/mgag200: Remove disable handling from atomic_update

2023-02-20 Thread Thomas Zimmermann

This mail never made it to dri-devel.

Am 17.02.23 um 14:26 schrieb Javier Martinez Canillas:

Thomas Zimmermann  writes:


The primary plane has the atomic_disable helper set, so atomic_update
won't be called if the plane gets disabled. Remove the respective branch
from the helper.

Signed-off-by: Thomas Zimmermann 
---


Reviewed-by: Javier Martinez Canillas 

Best regards,
Javier



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


[PATCH 6.1 050/118] drm: Disable dynamic debug as broken

2023-02-20 Thread Greg Kroah-Hartman
From: Ville Syrjälä 

commit bb2ff6c27bc9e1da4d3ec5e7b1d6b9df1092cb5a upstream.

CONFIG_DRM_USE_DYNAMIC_DEBUG breaks debug prints for (at least modular)
drm drivers. The debug prints can be reinstated by manually frobbing
/sys/module/drm/parameters/debug after the fact, but at that point the
damage is done and all debugs from driver probe are lost. This makes
drivers totally undebuggable.

There's a more complete fix in progress [1], with further details, but
we need this fixed in stable kernels. Mark the feature as broken and
disable it by default, with hopes distros follow suit and disable it as
well.

[1] https://lore.kernel.org/r/20230125203743.564009-1-jim.cro...@gmail.com

Fixes: 84ec67288c10 ("drm_print: wrap drm_*_dbg in dyndbg descriptor factory 
macro")
Cc: Jim Cromie 
Cc: Greg Kroah-Hartman 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: dri-devel@lists.freedesktop.org
Cc:  # v6.1+
Signed-off-by: Ville Syrjälä 
Acked-by: Greg Kroah-Hartman 
Acked-by: Jim Cromie 
Acked-by: Maxime Ripard 
Signed-off-by: Jani Nikula 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20230207143337.2126678-1-jani.nik...@intel.com
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/gpu/drm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 315cbdf61979..9abfb482b615 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -53,7 +53,8 @@ config DRM_DEBUG_MM
 
 config DRM_USE_DYNAMIC_DEBUG
bool "use dynamic debug to implement drm.debug"
-   default y
+   default n
+   depends on BROKEN
depends on DRM
depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
depends on JUMP_LABEL
-- 
2.39.1





Re: [Intel-gfx] [RFC 5/9] dma-fence: Track explicit waiters

2023-02-20 Thread Tvrtko Ursulin



On 18/02/2023 19:54, Rob Clark wrote:

On Thu, Feb 16, 2023 at 3:00 AM Tvrtko Ursulin
 wrote:


From: Tvrtko Ursulin 

Track how many callers are explicity waiting on a fence to signal and
allow querying that via new dma_fence_wait_count() API.

This provides infrastructure on top of which generic "waitboost" concepts
can be implemented by individual drivers. Wait-boosting is any reactive
activity, such as raising the GPU clocks, which happens while there are
active external waiters.

Signed-off-by: Tvrtko Ursulin 
---
  drivers/dma-buf/dma-fence.c   | 98 +--
  drivers/gpu/drm/i915/gt/intel_engine_pm.c |  1 -
  include/linux/dma-fence.h | 15 
  3 files changed, 87 insertions(+), 27 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index ea4a1f82c9bf..bdba5a8e21b1 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -344,6 +344,25 @@ void __dma_fence_might_wait(void)
  }
  #endif

+static void incr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+   lockdep_assert_held(fence->lock);
+
+   __set_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, >flags);
+   fence->waitcount++;
+   WARN_ON_ONCE(!fence->waitcount);
+}
+
+static void decr_wait_count(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+   lockdep_assert_held(fence->lock);
+
+   if (__test_and_clear_bit(DMA_FENCE_CB_FLAG_WAITCOUNT_BIT, >flags)) {
+   WARN_ON_ONCE(!fence->waitcount);
+   fence->waitcount--;
+   }
+}
+
  void __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
  {
 lockdep_assert_held(fence->lock);
@@ -363,6 +382,7 @@ __dma_fence_signal__notify(struct dma_fence *fence,
 lockdep_assert_held(fence->lock);

 list_for_each_entry_safe(cur, tmp, list, node) {
+   decr_wait_count(fence, cur);
 INIT_LIST_HEAD(>node);
 cur->func(fence, cur);
 }
@@ -629,11 +649,44 @@ void dma_fence_enable_sw_signaling(struct dma_fence 
*fence)
 unsigned long flags;

 spin_lock_irqsave(fence->lock, flags);
+   fence->waitcount++;
+   WARN_ON_ONCE(!fence->waitcount);
 __dma_fence_enable_signaling(fence);
 spin_unlock_irqrestore(fence->lock, flags);
  }
  EXPORT_SYMBOL(dma_fence_enable_sw_signaling);

+static int add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
+   dma_fence_func_t func, bool wait)
+{
+   unsigned long flags;
+   int ret = 0;
+
+   __dma_fence_cb_init(cb, func);
+
+   if (WARN_ON(!fence || !func))
+   return -EINVAL;
+
+   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags))
+   return -ENOENT;
+
+   spin_lock_irqsave(fence->lock, flags);
+
+   if (wait)
+   incr_wait_count(fence, cb);
+
+   if (__dma_fence_enable_signaling(fence)) {
+   list_add_tail(>node, >cb_list);
+   } else {
+   decr_wait_count(fence, cb);
+   ret = -ENOENT;
+   }
+
+   spin_unlock_irqrestore(fence->lock, flags);
+
+   return ret;
+}
+
  /**
   * dma_fence_add_callback - add a callback to be called when the fence
   * is signaled
@@ -659,31 +712,18 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
  int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,
dma_fence_func_t func)
  {
-   unsigned long flags;
-   int ret = 0;
-
-   __dma_fence_cb_init(cb, func);
-
-   if (WARN_ON(!fence || !func))
-   return -EINVAL;
-
-   if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags))
-   return -ENOENT;
-
-   spin_lock_irqsave(fence->lock, flags);
-
-   if (__dma_fence_enable_signaling(fence)) {
-   list_add_tail(>node, >cb_list);
-   } else {
-   ret = -ENOENT;
-   }
-
-   spin_unlock_irqrestore(fence->lock, flags);
-
-   return ret;
+   return add_callback(fence, cb, func, false);
  }
  EXPORT_SYMBOL(dma_fence_add_callback);

+int dma_fence_add_wait_callback(struct dma_fence *fence,
+   struct dma_fence_cb *cb,
+   dma_fence_func_t func)
+{
+   return add_callback(fence, cb, func, true);
+}
+EXPORT_SYMBOL(dma_fence_add_wait_callback);
+
  /**
   * dma_fence_get_status - returns the status upon completion
   * @fence: the dma_fence to query
@@ -736,8 +776,10 @@ dma_fence_remove_callback(struct dma_fence *fence, struct 
dma_fence_cb *cb)
 spin_lock_irqsave(fence->lock, flags);

 ret = !list_empty(>node);
-   if (ret)
+   if (ret) {
+   decr_wait_count(fence, cb);
 list_del_init(>node);
+   }

 spin_unlock_irqrestore(fence->lock, flags);

@@ -795,6 +837,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, 
signed long timeout)

 __dma_fence_cb_init(, 

Re: [Intel-gfx] [RFC 6/9] drm/syncobj: Mark syncobj waits as external waiters

2023-02-20 Thread Tvrtko Ursulin



On 18/02/2023 19:56, Rob Clark wrote:

On Thu, Feb 16, 2023 at 2:59 AM Tvrtko Ursulin
 wrote:


From: Tvrtko Ursulin 

Use the previously added dma-fence tracking of explicit waiters.

Signed-off-by: Tvrtko Ursulin 
---
  drivers/gpu/drm/drm_syncobj.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0c2be8360525..776b90774a64 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1065,9 +1065,9 @@ static signed long drm_syncobj_array_wait_timeout(struct 
drm_syncobj **syncobjs,
 if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
 dma_fence_is_signaled(fence) ||
 (!entries[i].fence_cb.func &&
-dma_fence_add_callback(fence,
-   [i].fence_cb,
-   syncobj_wait_fence_func))) {
+dma_fence_add_wait_callback(fence,
+[i].fence_cb,
+
syncobj_wait_fence_func))) {


I think this isn't really what you want if count > 1, because you
wouldn't be notifying the fence signaler of fence n+1 until the wait
on fence n completed


Are you sure? After some staring all I can see is that all callbacks are 
added before the first sleep.


Regards,

Tvrtko


Re: [PATCH] drm/i915/guc: avoid FIELD_PREP warning

2023-02-20 Thread Michal Wajdeczko



On 17.02.2023 13:46, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> With gcc-7 and earlier, there are lots of warnings like
> 
> In file included from :0:0:
> In function '__guc_context_policy_add_priority.isra.66',
> inlined from '__guc_context_set_prio.isra.67' at 
> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3,
> inlined from 'guc_context_set_prio' at 
> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2:
> include/linux/compiler_types.h:399:38: error: call to 
> '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is 
> not constant
>   _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
>   ^
> ...
> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion 
> of macro 'FIELD_PREP'
>FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
>^~
> 
> Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning.
> 
> Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3")
> Signed-off-by: Arnd Bergmann 

Reviewed-by: Michal Wajdeczko 

> ---
>  drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h 
> b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
> index 58012edd4eb0..4f4f53c42a9c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
> @@ -29,9 +29,9 @@
>   */
>  
>  #define GUC_KLV_LEN_MIN  1u
> -#define GUC_KLV_0_KEY(0x << 16)
> -#define GUC_KLV_0_LEN(0x << 0)
> -#define GUC_KLV_n_VALUE  (0x << 0)
> +#define GUC_KLV_0_KEY(0xu << 16)
> +#define GUC_KLV_0_LEN(0xu << 0)
> +#define GUC_KLV_n_VALUE  (0xu << 0)
>  
>  /**
>   * DOC: GuC Self Config KLVs


Re: [PATCH v4 0/6] drm: lcdif: Add i.MX93 LCDIF support

2023-02-20 Thread Liu Ying
On Mon, 2023-02-20 at 11:16 +0100, Alexander Stein wrote:
> Hi Liu,

Hi Alexander,

> 
> Am Montag, 20. Februar 2023, 09:55:19 CET schrieb Alexander Stein:
> > Hi Liu,
> > 
> > Am Freitag, 17. Februar 2023, 09:59:14 CET schrieb Liu Ying:
> > > On Fri, 2023-02-17 at 09:18 +0100, Alexander Stein wrote:
> > > > Hi Liu,
> > > 
> > > Hi Alexander,
> > > 
> > > > Am Freitag, 17. Februar 2023, 07:54:01 CET schrieb Liu Ying:
> > > > > Hi,
> > > > > 
> > > > > This patch set aims to add i.MX93 LCDIF display controller
> > > > > support
> > > > > in the existing LCDIF DRM driver.  The LCDIF embedded in
> > > > > i.MX93 SoC
> > > > > is essentially the same to those embedded in i.MX8mp
> > > > > SoC.  Through
> > > > > internal bridges, i.MX93 LCDIF may drive a MIPI DSI display
> > > > > or a LVDS
> > > > > display or a parallel display.
> > > > > 
> > > > > Patch 1/6 adds device tree binding support for i.MX93 LCDIF
> > > > > in the
> > > > > existing fsl,lcdif.yaml.
> > > > > 
> > > > > Patch 2/6 drops lcdif->bridge NULL pointer check as a cleanup
> > > > > patch.
> > > > > 
> > > > > Patch 3/6~5/6 prepare for adding i.MX93 LCDIF support step by
> > > > > step.
> > > > > 
> > > > > Patch 6/6 adds i.MX93 LCDIF compatible string as the last
> > > > > step of
> > > > > adding i.MX93 LCDIF support.
> > > > 
> > > > Thanks for the series. I could test this on my
> > > > TQMa93xxLA/MBa93xxCA with
> > > > a
> > > > single LVDS display attached, so no DSI or parallel display.
> > > > Hence I
> > > > could
> > > > not test the bus format and flags checks, but they look okay.
> > > > So you can add
> > > > Tested-by: Alexander Stein 
> > > > to the whole series as well.
> > > 
> > > Thanks for your test.
> > > 
> > > > One thing I noticed is that, sometimes it seems that before
> > > > probing
> > > > lcdif
> > > > my system completely freezes. Adding some debug output it seems
> > > > that's
> > > > during powering up the IMX93_MEDIABLK_PD_LCDIF power domain
> > > > there is
> > > > some
> > > > race condition. But adding more more detailed output made the
> > > > problem go
> > > > away. Did you notice something similar? It might be a red
> > > > hering though.
> > > 
> > > I don't see system freezing with my i.MX93 11x11 EVK when probing
> > > lcdif. I did try to boot the system several times. All look ok.
> > > This is
> > > a snippet of dmesg when lcdif probes:
> > > 
> > > --8<-
> > > -
> > > [0.753083] Serial: 8250/16550 driver, 4 ports, IRQ sharing
> > > enabled
> > > [0.761669] SuperH (H)SCI(F) driver initialized
> > > [0.766523] msm_serial: driver initialized
> > > [0.780523] printk: console [ttyLP0] enabled0x44380010 (irq =
> > > 16,
> > > base_baud = 150) is a FSL_LPUART
> > > [0.780523] printk: console [ttyLP0] enabled
> > > [0.788928] printk: bootconsole [lpuart32] disabled
> > > [0.788928] printk: bootconsole [lpuart32] disabled
> > > [0.804632] panel-simple lvds_panel: supply power not found,
> > > using
> > > dummy regulator
> > > [0.814741] [drm] Initialized imx-lcdif 1.0.0 20220417 for
> > > 4ae3.lcd-controller on minor 0
> > > [1.195930] Console: switching to colour frame buffer device
> > > 160x50
> > > [1.218385] imx-lcdif 4ae3.lcd-controller: [drm] fb0: imx-
> > > lcdifdrmfb frame buffer device
> > > [1.227099] cacheinfo: Unable to detect cache hierarchy for
> > > CPU 0
> > > [1.236725] loop: module loaded
> > > --8<-
> > > -
> > > 
> > > ~300 milliseconds are consumed by the enablement delay required
> > > by the
> > > "boe,ev121wxm-n10-1850" LVDS panel I use.
> > 
> > It seems you have the drivers compiled in. I use modules in my case
> > and
> > simple-panel as well. But this is unrelated, because lcdif_probe()
> > is yet to
> > be called. Using the debug diff from below I get the following
> > output:
> > 
> > [   16.97] imx93-blk-ctrl 4ac1.system-controller:
> > imx93_blk_ctrl_power_on: 1
> > [   16.122491] imx93-blk-ctrl 4ac1.system-controller:
> > imx93_blk_ctrl_power_on: 2
> > [   16.137766] imx93-blk-ctrl 4ac1.system-controller:
> > imx93_blk_ctrl_power_on: 3
> > [   16.154905] imx93-blk-ctrl 4ac1.system-controller:
> > imx93_blk_ctrl_power_on: 4
> > 
> > It seems setting BLK_CLK_EN blocks the whole system, even reading
> > is not
> > possible. I don't have any details on the hardware, but it seems
> > that either
> > some clock or power domain is not enabled. This can also happen if
> > I'm
> > loading the lcdif module manually after boot. But I can't detect
> > any
> > differences in / sys/kernel/debug/clk/clk_summary.
> 
> I think I found the cause. It's the maximum clock frequency for
> media_axi and 
> media_apb. These clocks were not explicitly configured, most
> probably 
> exceeding the maximum frequency allowed.

Thanks for sharing the cause. I use 400MHz media_axi and 133MHz

Re: [PATCH] drm/fb-helper: Try to protect cleanup against delayed setup

2023-02-20 Thread Thomas Zimmermann

Hi

Am 17.02.23 um 20:47 schrieb Daniel Vetter:

Some vague evidences suggests this can go wrong. Try to prevent it by
holding the right mutex and clearing ->deferred_setup to make sure we
later on don't accidentally try to re-register the fbdev when the
driver thought it had it all cleaned up already.

v2: I realized that this is fundamentally butchered, and CI complained
about lockdep splats. So limit the critical section again and just add
a few notes what the proper fix is.

References: 
https://intel-gfx-ci.01.org/tree/linux-next/next-20201215/fi-byt-j1900/igt@i915_pm_...@module-reload.html
Signed-off-by: Daniel Vetter 
Cc: Ville Syrjälä 
Cc: Chris Wilson 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
---
  drivers/gpu/drm/drm_fb_helper.c | 6 ++
  drivers/gpu/drm/drm_fbdev_generic.c | 5 +
  2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 3e17261a12b6..2415a2c7ca44 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -545,6 +545,9 @@ EXPORT_SYMBOL(drm_fb_helper_alloc_info);
   * A wrapper around unregister_framebuffer, to release the fb_info
   * framebuffer device. This must be called before releasing all resources for
   * @fb_helper by calling drm_fb_helper_fini().
+ *
+ * Note that this is fundamentally racy on hotunload because it doen't handle
+ * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
   */
  void drm_fb_helper_unregister_info(struct drm_fb_helper *fb_helper)
  {
@@ -558,6 +561,9 @@ EXPORT_SYMBOL(drm_fb_helper_unregister_info);
   * @fb_helper: driver-allocated fbdev helper, can be NULL
   *
   * This cleans up all remaining resources associated with @fb_helper.
+ *
+ * Note that this is fundamentally racy on hotunload because it doen't handle
+ * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
   */
  void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
  {
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c 
b/drivers/gpu/drm/drm_fbdev_generic.c
index 365f80717fa1..1618109592ce 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -347,7 +347,12 @@ static void drm_fbdev_client_unregister(struct 
drm_client_dev *client)
  {
struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
  
+	mutex_lock(_helper->lock);

+   fb_helper->deferred_setup = false;
+   mutex_unlock(_helper->lock);


The unregister code runs as part of the client cleanup.  And the client 
also goes through a number of helpers that handle display hotplug and/or 
restore, which are affected by ->deferred_setup.  But it's all mutually 
exclusive. AFAICT nothing runs parallel to the unregister code.  See the 
use of dev->clientlist_mutex in drm_client.c.  The patch is not 
necessary IMHO.


Best regards
Thomas


+
if (fb_helper->info) {
+   /* drm_fbdev_fb_destroy() takes care of cleanup */
drm_fb_helper_unregister_info(fb_helper);
} else {
drm_client_release(_helper->client);


--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH] backlight: qcom-wled: Add PMI8950 compatible

2023-02-20 Thread Daniel Thompson
On Mon, Feb 06, 2023 at 08:58:30PM +0100, Luca Weiss wrote:
> PMI8950 contains WLED of version 4. Add support for it to the driver.
>
> Signed-off-by: Luca Weiss 

Reviewed-by: Daniel Thompson 


> ---
> While adding dt-bindings and dts in a previous series I forgot to add the
> compatible to the driver. Fix that now.
> ---
>  drivers/video/backlight/qcom-wled.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/video/backlight/qcom-wled.c 
> b/drivers/video/backlight/qcom-wled.c
> index 527210e85795..5f504883aca5 100644
> --- a/drivers/video/backlight/qcom-wled.c
> +++ b/drivers/video/backlight/qcom-wled.c
> @@ -1731,6 +1731,7 @@ static int wled_remove(struct platform_device *pdev)
>
>  static const struct of_device_id wled_match_table[] = {
>   { .compatible = "qcom,pm8941-wled", .data = (void *)3 },
> + { .compatible = "qcom,pmi8950-wled", .data = (void *)4 },
>   { .compatible = "qcom,pmi8994-wled", .data = (void *)4 },
>   { .compatible = "qcom,pmi8998-wled", .data = (void *)4 },
>   { .compatible = "qcom,pm660l-wled", .data = (void *)4 },
>
> ---
> base-commit: 1b929c02afd37871d5afb9d498426f83432e71c2
> change-id: 20221226-msm8953-6-2-wled-5f966bfa4db3
>
> Best regards,
> --
> Luca Weiss 
>


Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Tvrtko Ursulin



On 17/02/2023 17:00, Rob Clark wrote:

On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
 wrote:


[snip]


adapted from your patches..  I think the basic idea of deadlines
(which includes "I want it NOW" ;-)) isn't controversial, but the
original idea got caught up in some bikeshed (what about compositors
that wait on fences in userspace to decide which surfaces to update in
the next frame), plus me getting busy and generally not having a good
plan for how to leverage this from VM guests (which is becoming
increasingly important for CrOS).  I think I can build on some ongoing
virtgpu fencing improvement work to solve the latter.  But now that we
have a 2nd use-case for this, it makes sense to respin.


Sure, I was looking at the old version already. It is interesting. But
also IMO needs quite a bit more work to approach achieving what is
implied from the name of the feature. It would need proper deadline
based sched job picking, and even then drm sched is mostly just a
frontend. So once past runnable status and jobs handed over to backend,
without further driver work it probably wouldn't be very effective past
very lightly loaded systems.


Yes, but all of that is not part of dma_fence ;-)


:) Okay.

Having said that, do we need a step back to think about whether adding 
deadline to dma-fences is not making them something too much different 
to what they were? Going from purely synchronisation primitive more 
towards scheduling paradigms. Just to brainstorm if there will not be 
any unintended consequences. I should mention this in your RFC thread 
actually.



A pretty common challenging usecase is still the single fullscreen
game, where scheduling isn't the problem, but landing at an
appropriate GPU freq absolutely is.  (UI workloads are perhaps more
interesting from a scheduler standpoint, but they generally aren't
challenging from a load/freq standpoint.)


Challenging as in picking the right operating point? Might be latency 
impacted (and so user perceived UI smoothness) due missing waitboost for 
anything syncobj related. I don't know if anything to measure that 
exists currently though. Assuming it is measurable then the question 
would be is it perceivable.

Fwiw, the original motivation of the series was to implement something
akin to i915 pageflip boosting without having to abandon the atomic
helpers.  (And, I guess it would also let i915 preserve that feature
if it switched to atomic helpers.. I'm unsure if there are still other
things blocking i915's migration.)


Question for display folks I guess.


Then if we fast forward to a world where schedulers perhaps become fully
deadline aware (we even had this for i915 few years back) then the
question will be does equating waits with immediate deadlines still
works. Maybe not too well because we wouldn't have the ability to
distinguish between the "someone is waiting" signal from the otherwise
propagated deadlines.


Is there any other way to handle a wait boost than expressing it as an
ASAP deadline?


A leading question or just a question? Nothing springs to my mind at the 
moment.


Regards,

Tvrtko


Re: [PATCH v4 0/6] drm: lcdif: Add i.MX93 LCDIF support

2023-02-20 Thread Rasmus Villemoes
On 17/02/2023 09.18, Alexander Stein wrote:
> Hi Liu,
> 
> Am Freitag, 17. Februar 2023, 07:54:01 CET schrieb Liu Ying:
>> Hi,
>>
>> This patch set aims to add i.MX93 LCDIF display controller support
>> in the existing LCDIF DRM driver.  The LCDIF embedded in i.MX93 SoC
>> is essentially the same to those embedded in i.MX8mp SoC.  Through
>> internal bridges, i.MX93 LCDIF may drive a MIPI DSI display or a LVDS
>> display or a parallel display.
>>
>> Patch 1/6 adds device tree binding support for i.MX93 LCDIF in the
>> existing fsl,lcdif.yaml.
>>
>> Patch 2/6 drops lcdif->bridge NULL pointer check as a cleanup patch.
>>
>> Patch 3/6~5/6 prepare for adding i.MX93 LCDIF support step by step.
>>
>> Patch 6/6 adds i.MX93 LCDIF compatible string as the last step of
>> adding i.MX93 LCDIF support.
> 
> Thanks for the series. I could test this on my TQMa93xxLA/MBa93xxCA with a 
> single LVDS display attached, so no DSI or parallel display. Hence I could 
> not 
> test the bus format and flags checks, but they look okay.
> So you can add
> Tested-by: Alexander Stein 
> to the whole series as well.
> 
> One thing I noticed is that, sometimes it seems that before probing lcdif my 
> system completely freezes. Adding some debug output it seems that's during 
> powering up the IMX93_MEDIABLK_PD_LCDIF power domain there is some race 
> condition. But adding more more detailed output made the problem go away.
> Did you notice something similar? It might be a red hering though.

Interesting. Sounds similar to what I encountered on several
imx8mp-based boards, both the NXP EVK and our custom design, running a
mainline U-Boot and downstream NXP kernel:

https://lore.kernel.org/u-boot/20220823133645.4046432-1-rasmus.villem...@prevas.dk/

I never really found a real solution, but as the hack I ended up
applying in U-Boot does involve some clock settings, and you apparently
now figured out some connection to "overclocking", I do think these
issues are related.

Rasmus



[PATCH v2 2/2] drm/panel: Add driver for Novatek NT36523

2023-02-20 Thread Jianhua Lu
Add a driver for panels using the Novatek NT36523 display driver IC.

Signed-off-by: Jianhua Lu 
---
Changes in v2:
  - Refactor and clean up source code

 MAINTAINERS   |   7 +
 drivers/gpu/drm/panel/Kconfig |  10 +
 drivers/gpu/drm/panel/Makefile|   1 +
 drivers/gpu/drm/panel/panel-novatek-nt36523.c | 832 ++
 4 files changed, 850 insertions(+)
 create mode 100644 drivers/gpu/drm/panel/panel-novatek-nt36523.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 6b91bcbbc22f..3423c6a255ff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6538,6 +6538,13 @@ T:   git git://anongit.freedesktop.org/drm/drm-misc
 F: Documentation/devicetree/bindings/display/panel/sony,acx424akp.yaml
 F: drivers/gpu/drm/panel/panel-novatek-nt35560.c
 
+DRM DRIVER FOR NOVATEK NT36523 PANELS
+M: Jianhua Lu 
+S: Maintained
+T: git git://anongit.freedesktop.org/drm/drm-misc
+F: Documentation/devicetree/bindings/display/panel/novatek,nt36523.yaml
+F: drivers/gpu/drm/panel/panel-novatek-nt36523.c
+
 DRM DRIVER FOR NOVATEK NT36672A PANELS
 M: Sumit Semwal 
 S: Maintained
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 871c..268508743b5c 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -377,6 +377,16 @@ config DRM_PANEL_NOVATEK_NT35950
  Sharp panels used in Sony Xperia Z5 Premium and XZ Premium
  mobile phones.
 
+config DRM_PANEL_NOVATEK_NT36523
+   tristate "Novatek NT36523 panel driver"
+   depends on OF
+   depends on DRM_MIPI_DSI
+   depends on BACKLIGHT_CLASS_DEVICE
+   help
+ Say Y here if you want to enable support for the panels built
+ around the Novatek NT36523 display controller, such as some
+ Boe panels used in Xiaomi Mi Pad 5 and 5 Pro tablets.
+
 config DRM_PANEL_NOVATEK_NT36672A
tristate "Novatek NT36672A DSI panel"
depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index c05aa9e23907..570eab8bf2b2 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_DRM_PANEL_NEWVISION_NV3052C) += 
panel-newvision-nv3052c.o
 obj-$(CONFIG_DRM_PANEL_NOVATEK_NT35510) += panel-novatek-nt35510.o
 obj-$(CONFIG_DRM_PANEL_NOVATEK_NT35560) += panel-novatek-nt35560.o
 obj-$(CONFIG_DRM_PANEL_NOVATEK_NT35950) += panel-novatek-nt35950.o
+obj-$(CONFIG_DRM_PANEL_NOVATEK_NT36523) += panel-novatek-nt36523.o
 obj-$(CONFIG_DRM_PANEL_NOVATEK_NT36672A) += panel-novatek-nt36672a.o
 obj-$(CONFIG_DRM_PANEL_NOVATEK_NT39016) += panel-novatek-nt39016.o
 obj-$(CONFIG_DRM_PANEL_MANTIX_MLAF057WE51) += panel-mantix-mlaf057we51.o
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36523.c 
b/drivers/gpu/drm/panel/panel-novatek-nt36523.c
new file mode 100644
index ..1516e243d98e
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-novatek-nt36523.c
@@ -0,0 +1,832 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Novatek NT36523 DriverIC panels driver
+ * This driver based on the source code of panel-boe-tv101wum-nl6.c
+ * and panel-novatek-nt35950.c
+ *
+ * Copyright (c) 2022, 2023 Jianhua Lu 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define DSI_NUM_MIN 1
+
+struct panel_desc {
+   unsigned int width_mm;
+   unsigned int height_mm;
+
+   unsigned int bpc;
+   unsigned int lanes;
+   unsigned long mode_flags;
+   enum mipi_dsi_pixel_format format;
+
+   const struct drm_display_mode *modes;
+   const struct mipi_dsi_device_info dsi_info;
+   const struct panel_init_cmd *init_cmds;
+   bool is_dual_dsi;
+};
+
+struct panel_info {
+   struct drm_panel panel;
+   struct mipi_dsi_device *dsi[2];
+   const struct panel_desc *desc;
+
+   struct gpio_desc *reset_gpio;
+   struct backlight_device *backlight;
+   struct regulator_bulk_data supplies[3];
+
+   bool prepared;
+};
+
+enum dsi_cmd_type {
+   INIT_DCS_CMD,
+   DELAY_CMD,
+};
+
+struct panel_init_cmd {
+   enum dsi_cmd_type type;
+   size_t len;
+   const char *data;
+};
+
+#define _INIT_DCS_CMD(...) { \
+   .type = INIT_DCS_CMD, \
+   .len = sizeof((char[]){__VA_ARGS__}), \
+   .data = (char[]){__VA_ARGS__} }
+
+#define _INIT_DELAY_CMD(...) { \
+   .type = DELAY_CMD,\
+   .len = sizeof((char[]){__VA_ARGS__}), \
+   .data = (char[]){__VA_ARGS__} }
+
+static const char * const nt36523_regulator_names[] = {
+   "vddio",
+   "vddpos",
+   "vddneg",
+};
+
+static const unsigned long nt36523_regulator_enable_loads[] = {
+   62000,
+   10,
+   10
+};
+
+static const struct panel_init_cmd elish_boe_init_cmds[] = {
+   _INIT_DCS_CMD(0xFF, 0x10),
+   _INIT_DCS_CMD(0xFB, 0x01),
+   _INIT_DCS_CMD(0xB9, 0x05),
+   

[PATCH v2 1/2] dt-bindings: display: panel: Add Novatek NT36523 bindings

2023-02-20 Thread Jianhua Lu
Novatek NT36523 is a display driver IC used to drive DSI panels.

Signed-off-by: Jianhua Lu 
---
Changes in v2:
  - Drop unnecessary description
  - dsi0 -> dsi
  - Correct indentation

 .../display/panel/novatek,nt36523.yaml| 95 +++
 1 file changed, 95 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/display/panel/novatek,nt36523.yaml

diff --git 
a/Documentation/devicetree/bindings/display/panel/novatek,nt36523.yaml 
b/Documentation/devicetree/bindings/display/panel/novatek,nt36523.yaml
new file mode 100644
index ..544c3c6e568b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/novatek,nt36523.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/panel/novatek,nt36523.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Novatek NT36523 based DSI display Panels
+
+maintainers:
+  - Jianhua Lu 
+
+description: |
+  The Novatek NT36523 is a generic DSI Panel IC used to drive dsi
+  panels. Support video mode panels from China Star Optoelectronics
+  Technology (CSOT) and BOE Technology.
+
+allOf:
+  - $ref: panel-common.yaml#
+
+properties:
+  compatible:
+items:
+  - enum:
+  - xiaomi,elish-boe-nt36523
+  - xiaomi,elish-csot-nt36523
+  - const: novatek,nt36523
+
+  reset-gpios:
+maxItems: 1
+description: phandle of gpio for reset line - This should be 8mA
+
+  vddio-supply:
+description: regulator that supplies the I/O voltage
+
+  vddpos-supply:
+description: positive boost supply regulator
+
+  vddneg-supply:
+description: negative boost supply regulator
+
+  reg: true
+  ports: true
+  backlight: true
+
+required:
+  - compatible
+  - reg
+  - vddio-supply
+  - vddpos-supply
+  - vddneg-supply
+  - reset-gpios
+  - ports
+
+unevaluatedProperties: false
+
+examples:
+  - |
+#include 
+
+dsi {
+#address-cells = <1>;
+#size-cells = <0>;
+
+panel@0 {
+compatible = "xiaomi,elish-csot-nt36523", "novatek,nt36523";
+reg = <0>;
+vddio-supply = <_l14a_1p88>;
+vddpos-supply = <_vddpos_5p5>;
+vddneg-supply = <_vddneg_5p5>;
+
+backlight = <>;
+reset-gpios = < 75 GPIO_ACTIVE_LOW>;
+
+ports {
+#address-cells = <1>;
+#size-cells = <0>;
+
+port@0 {
+reg = <0>;
+panel_in_0: endpoint {
+remote-endpoint = <_out>;
+};
+};
+
+port@1{
+reg = <1>;
+panel_in_1: endpoint {
+remote-endpoint = <_out>;
+};
+};
+};
+};
+};
+
+...
-- 
2.39.2



Re: [Intel-gfx] [RFC v2 0/5] Waitboost drm syncobj waits

2023-02-20 Thread Tvrtko Ursulin



On 17/02/2023 20:45, Rodrigo Vivi wrote:

On Fri, Feb 17, 2023 at 09:00:49AM -0800, Rob Clark wrote:

On Fri, Feb 17, 2023 at 8:03 AM Tvrtko Ursulin
 wrote:



On 17/02/2023 14:55, Rob Clark wrote:

On Fri, Feb 17, 2023 at 4:56 AM Tvrtko Ursulin
 wrote:



On 16/02/2023 18:19, Rodrigo Vivi wrote:

On Tue, Feb 14, 2023 at 11:14:00AM -0800, Rob Clark wrote:

On Fri, Feb 10, 2023 at 5:07 AM Tvrtko Ursulin
 wrote:


From: Tvrtko Ursulin 

In i915 we have this concept of "wait boosting" where we give a priority boost
for instance to fences which are actively waited upon from userspace. This has
it's pros and cons and can certainly be discussed at lenght. However fact is
some workloads really like it.

Problem is that with the arrival of drm syncobj and a new userspace waiting
entry point it added, the waitboost mechanism was bypassed. Hence I cooked up
this mini series really (really) quickly to see if some discussion can be had.

It adds a concept of "wait count" to dma fence, which is incremented for every
explicit dma_fence_enable_sw_signaling and dma_fence_add_wait_callback (like
dma_fence_add_callback but from explicit/userspace wait paths).


I was thinking about a similar thing, but in the context of dma_fence
(or rather sync_file) fd poll()ing.  How does the kernel differentiate
between "housekeeping" poll()ers that don't want to trigger boost but
simply know when to do cleanup, and waiters who are waiting with some
urgency.  I think we could use EPOLLPRI for this purpose.

Not sure how that translates to waits via the syncobj.  But I think we
want to let userspace give some hint about urgent vs housekeeping
waits.


Should the hint be on the waits, or should the hints be on the executed
context?

In the end we need some way to quickly ramp-up the frequency to avoid
the execution bubbles.

waitboost is trying to guess that, but in some cases it guess wrong
and waste power.


Do we have a list of workloads which shows who benefits and who loses
from the current implementation of waitboost?

btw, this is something that other drivers might need:

https://gitlab.freedesktop.org/drm/amd/-/issues/1500#note_825883
Cc: Alex Deucher 


I have several issues with the context hint if it would directly
influence frequency selection in the "more power" direction.

First of all, assume a context hint would replace the waitboost. Which
applications would need to set it to restore the lost performance and
how would they set it?

Then I don't even think userspace necessarily knows. Think of a layer
like OpenCL. It doesn't really know in advance the profile of
submissions vs waits. It depends on the CPU vs GPU speed, so hardware
generation, and the actual size of the workload which can be influenced
by the application (or user) and not the library.

The approach also lends itself well for the "arms race" where every
application can say "Me me me, I am the most important workload there is!".


since there is discussion happening in two places:

https://gitlab.freedesktop.org/drm/intel/-/issues/8014#note_1777433

What I think you might want is a ctx boost_mask which lets an app or
driver disable certain boost signals/classes.  Where fence waits is
one class of boost, but hypothetical other signals like touchscreen
(or other) input events could be another class of boost.  A compute
workload might be interested in fence wait boosts but could care less
about input events.


I think it can only be apps which could have any chance knowing whether
their use of a library is latency sensitive or not. Which means new
library extensions and their adoption. So I have some strong reservation
that route is feasible.

Or we tie with priority which many drivers do. Normal and above gets the
boosting and what lowered itself does not (aka SCHED_IDLE/SCHED_BATCH).


yeah, that sounds reasonable.



on that gitlab-issue discussion Emma Anholt was against using the priority
to influence frequency since that should be more about latency.

or we are talking about something different priority here?


As Rob already explained - I was suggesting skipping waitboost for 
contexts which explicitly made themselves low priority. I don't see a 
controversial angle there.



Related note is that we lack any external control of our scheduling
decisions so we really do suck compared to other scheduling domains like
CPU and IO etc.


The last concern is for me shared with the proposal to expose deadlines
or high priority waits as explicit uapi knobs. Both come under the "what
application told us it will do" category vs what it actually does. So I
think it is slightly weaker than basing decisions of waits.

The current waitboost is a bit detached from that problem because when
we waitboost for flips we _know_ it is an actual framebuffer in the flip
chain. When we waitboost for waits we also know someone is waiting. We
are not trusting userspace telling us this will be a buffer in the flip
chain or that this is a context which will have a certain 

Re: linux-6.2-rc4+ hangs on poweroff/reboot: Bisected

2023-02-20 Thread Karol Herbst
On Mon, Feb 20, 2023 at 11:51 AM Chris Clayton  wrote:
>
>
>
> On 20/02/2023 05:35, Ben Skeggs wrote:
> > On Sun, 19 Feb 2023 at 04:55, Chris Clayton  
> > wrote:
> >>
> >>
> >>
> >> On 18/02/2023 15:19, Chris Clayton wrote:
> >>>
> >>>
> >>> On 18/02/2023 12:25, Karol Herbst wrote:
>  On Sat, Feb 18, 2023 at 1:22 PM Chris Clayton  
>  wrote:
> >
> >
> >
> > On 15/02/2023 11:09, Karol Herbst wrote:
> >> On Wed, Feb 15, 2023 at 11:36 AM Linux regression tracking #update
> >> (Thorsten Leemhuis)  wrote:
> >>>
> >>> On 13.02.23 10:14, Chris Clayton wrote:
>  On 13/02/2023 02:57, Dave Airlie wrote:
> > On Sun, 12 Feb 2023 at 00:43, Chris Clayton 
> >  wrote:
> >>
> >>
> >>
> >> On 10/02/2023 19:33, Linux regression tracking (Thorsten Leemhuis) 
> >> wrote:
> >>> On 10.02.23 20:01, Karol Herbst wrote:
>  On Fri, Feb 10, 2023 at 7:35 PM Linux regression tracking 
>  (Thorsten
>  Leemhuis)  wrote:
> >
> > On 08.02.23 09:48, Chris Clayton wrote:
> >>
> >> I'm assuming  that we are not going to see a fix for this 
> >> regression before 6.2 is released.
> >
> > Yeah, looks like it. That's unfortunate, but happens. But there 
> > is still
> > time to fix it and there is one thing I wonder:
> >
> > Did any of the nouveau developers look at the netconsole 
> > captures Chris
> > posted more than a week ago to check if they somehow help to 
> > track down
> > the root of this problem?
> 
>  I did now and I can't spot anything. I think at this point it 
>  would
>  make sense to dump the active tasks/threads via sqsrq keys to 
>  see if
>  any is in a weird state preventing the machine from shutting 
>  down.
> >>>
> >>> Many thx for looking into it!
> >>
> >> Yes, thanks Karol.
> >>
> >> Attached is the output from dmesg when this block of code:
> >>
> >> /bin/mount /dev/sda7 /mnt/sda7
> >> /bin/mountpoint /proc || /bin/mount /proc
> >> /bin/dmesg -w > /mnt/sda7/sysrq.dmesg.log &
> >> /bin/echo t > /proc/sysrq-trigger
> >> /bin/sleep 1
> >> /bin/sync
> >> /bin/sleep 1
> >> kill $(pidof dmesg)
> >> /bin/umount /mnt/sda7
> >>
> >> is executed immediately before /sbin/reboot is called as the final 
> >> step of rebooting my system.
> >>
> >> I hope this is what you were looking for, but if not, please let 
> >> me know what you need
> 
>  Thanks Dave. [...]
> >>> FWIW, in case anyone strands here in the archives: the msg was
> >>> truncated. The full post can be found in a new thread:
> >>>
> >>> https://lore.kernel.org/lkml/e0b80506-b3cf-315b-4327-1b988d860...@googlemail.com/
> >>>
> >>> Sadly it seems the info "With runpm=0, both reboot and poweroff work 
> >>> on
> >>> my laptop." didn't bring us much further to a solution. :-/ I don't
> >>> really like it, but for regression tracking I'm now putting this on 
> >>> the
> >>> back-burner, as a fix is not in sight.
> >>>
> >>> #regzbot monitor:
> >>> https://lore.kernel.org/lkml/e0b80506-b3cf-315b-4327-1b988d860...@googlemail.com/
> >>> #regzbot backburner: hard to debug and apparently rare
> >>> #regzbot ignore-activity
> >>>
> >>
> >> yeah.. this bug looks a little annoying. Sadly the only Turing based
> >> laptop I got doesn't work on Nouveau because of firmware related
> >> issues and we probably need to get updated ones from Nvidia here :(
> >>
> >> But it's a bit weird that the kernel doesn't shutdown, because I don't
> >> see anything in the logs which would prevent that from happening.
> >> Unless it's waiting on one of the tasks to complete, but none of them
> >> looked in any way nouveau related.
> >>
> >> If somebody else has any fancy kernel debugging tips here to figure
> >> out why it hangs, that would be very helpful...
> >>
> >
> > I think I've figured this out. It's to do with how my system is 
> > configured. I do have an initrd, but the only thing on
> > it is the cpu microcode which, it is recommended, should be loaded 
> > early. The absence of the NVidia firmare from an
> > initrd doesn't matter because the drivers for the hardware that need to 
> > load firmware are all built as modules, So, by
> > the time the devices are configured via udev, the root partition is 
> > mounted and the drivers can get at the firmware.
> >
> 

Re: linux-6.2-rc4+ hangs on poweroff/reboot: Bisected

2023-02-20 Thread Chris Clayton



On 20/02/2023 05:35, Ben Skeggs wrote:
> On Sun, 19 Feb 2023 at 04:55, Chris Clayton  wrote:
>>
>>
>>
>> On 18/02/2023 15:19, Chris Clayton wrote:
>>>
>>>
>>> On 18/02/2023 12:25, Karol Herbst wrote:
 On Sat, Feb 18, 2023 at 1:22 PM Chris Clayton  
 wrote:
>
>
>
> On 15/02/2023 11:09, Karol Herbst wrote:
>> On Wed, Feb 15, 2023 at 11:36 AM Linux regression tracking #update
>> (Thorsten Leemhuis)  wrote:
>>>
>>> On 13.02.23 10:14, Chris Clayton wrote:
 On 13/02/2023 02:57, Dave Airlie wrote:
> On Sun, 12 Feb 2023 at 00:43, Chris Clayton 
>  wrote:
>>
>>
>>
>> On 10/02/2023 19:33, Linux regression tracking (Thorsten Leemhuis) 
>> wrote:
>>> On 10.02.23 20:01, Karol Herbst wrote:
 On Fri, Feb 10, 2023 at 7:35 PM Linux regression tracking (Thorsten
 Leemhuis)  wrote:
>
> On 08.02.23 09:48, Chris Clayton wrote:
>>
>> I'm assuming  that we are not going to see a fix for this 
>> regression before 6.2 is released.
>
> Yeah, looks like it. That's unfortunate, but happens. But there 
> is still
> time to fix it and there is one thing I wonder:
>
> Did any of the nouveau developers look at the netconsole captures 
> Chris
> posted more than a week ago to check if they somehow help to 
> track down
> the root of this problem?

 I did now and I can't spot anything. I think at this point it would
 make sense to dump the active tasks/threads via sqsrq keys to see 
 if
 any is in a weird state preventing the machine from shutting down.
>>>
>>> Many thx for looking into it!
>>
>> Yes, thanks Karol.
>>
>> Attached is the output from dmesg when this block of code:
>>
>> /bin/mount /dev/sda7 /mnt/sda7
>> /bin/mountpoint /proc || /bin/mount /proc
>> /bin/dmesg -w > /mnt/sda7/sysrq.dmesg.log &
>> /bin/echo t > /proc/sysrq-trigger
>> /bin/sleep 1
>> /bin/sync
>> /bin/sleep 1
>> kill $(pidof dmesg)
>> /bin/umount /mnt/sda7
>>
>> is executed immediately before /sbin/reboot is called as the final 
>> step of rebooting my system.
>>
>> I hope this is what you were looking for, but if not, please let me 
>> know what you need

 Thanks Dave. [...]
>>> FWIW, in case anyone strands here in the archives: the msg was
>>> truncated. The full post can be found in a new thread:
>>>
>>> https://lore.kernel.org/lkml/e0b80506-b3cf-315b-4327-1b988d860...@googlemail.com/
>>>
>>> Sadly it seems the info "With runpm=0, both reboot and poweroff work on
>>> my laptop." didn't bring us much further to a solution. :-/ I don't
>>> really like it, but for regression tracking I'm now putting this on the
>>> back-burner, as a fix is not in sight.
>>>
>>> #regzbot monitor:
>>> https://lore.kernel.org/lkml/e0b80506-b3cf-315b-4327-1b988d860...@googlemail.com/
>>> #regzbot backburner: hard to debug and apparently rare
>>> #regzbot ignore-activity
>>>
>>
>> yeah.. this bug looks a little annoying. Sadly the only Turing based
>> laptop I got doesn't work on Nouveau because of firmware related
>> issues and we probably need to get updated ones from Nvidia here :(
>>
>> But it's a bit weird that the kernel doesn't shutdown, because I don't
>> see anything in the logs which would prevent that from happening.
>> Unless it's waiting on one of the tasks to complete, but none of them
>> looked in any way nouveau related.
>>
>> If somebody else has any fancy kernel debugging tips here to figure
>> out why it hangs, that would be very helpful...
>>
>
> I think I've figured this out. It's to do with how my system is 
> configured. I do have an initrd, but the only thing on
> it is the cpu microcode which, it is recommended, should be loaded early. 
> The absence of the NVidia firmare from an
> initrd doesn't matter because the drivers for the hardware that need to 
> load firmware are all built as modules, So, by
> the time the devices are configured via udev, the root partition is 
> mounted and the drivers can get at the firmware.
>
> I've found, by turning on nouveau debug and taking a video of the screen 
> as the system shuts down, that nouveau seems to
> be trying to run the scrubber very very late in the shutdown process. The 
> problem is that by this time, I think the root
> partition, and thus the scrubber binary, have become inaccessible.
>
> I seem to 

Re: [PATCH v2 06/14] drm/msm/gpu: Use dev_pm_opp_set_rate for non-GMU GPUs

2023-02-20 Thread Dmitry Baryshkov
On Mon, 20 Feb 2023 at 11:59, Konrad Dybcio  wrote:
> On 18.02.2023 17:47, Dmitry Baryshkov wrote:
> > On 18/02/2023 13:04, Konrad Dybcio wrote:
> >> On 17.02.2023 22:07, Dmitry Baryshkov wrote:
> >>> On 14/02/2023 19:31, Konrad Dybcio wrote:
>  Currently we only utilize the OPP table connected to the GPU for
>  getting (available) frequencies. We do however need to scale the
>  voltage rail(s) accordingly to ensure that we aren't trying to
>  run the GPU at 1GHz with a VDD_LOW vote, as that would result in
>  an otherwise inexplainable hang.
> 
>  Tell the OPP framework that we want to scale the "core" clock
>  and swap out the clk_set_rate to a dev_pm_opp_set_rate in
>  msm_devfreq_target() to enable usage of required-opps and by
>  extension proper voltage level/corner scaling.
> 
>  Signed-off-by: Konrad Dybcio 
>  ---
> drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 
> drivers/gpu/drm/msm/msm_gpu_devfreq.c   | 2 +-
> 2 files changed, 5 insertions(+), 1 deletion(-)
> 
>  diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
>  b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
>  index ce6b76c45b6f..15e405e4f977 100644
>  --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
>  +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
>  @@ -1047,6 +1047,10 @@ int adreno_gpu_init(struct drm_device *drm, 
>  struct platform_device *pdev,
> const char *gpu_name;
> u32 speedbin;
> +/* This can only be done here, or devm_pm_opp_set_supported_hw 
>  will WARN_ON() */
>  +if (!IS_ERR(devm_clk_get(dev, "core")))
>  +devm_pm_opp_set_clkname(dev, "core");
> >>>
> >>> Can we instead move a call to a6xx_set_supported_hw() / check_speed_bin 
> >>> after the adreno_gpu_init() ? It will call msm_gpu_init, which in turn 
> >>> sets gpu->core_clk.
> >>>
> >>> Ideally you can call devm_pm_opp_set_clkname() from that function.
> >>
> >>
> >>> Or maybe completely drop gpu->core_clk and always use 
> >>> devm_pm_opp_set_clk_rate().
> >> That would break non-OPP targets, last of which were probably added N=big 
> >> years ago..
> >
> > No. In the lack of OPP tables, dev_pm_opp_clk_set_rate() should behave 
> > exactly like the clk_set_rate().
> Not sure if that's what you meant, but if a device lacks OPP,
> devm_pm_opp_set_rate will return -ENODEV.
>
> If you meant "if we can't find an opp table, behave as if we
> called clk_set_rate", a discussion on #freedreno with robclark
> indicates he'd accept getting rid of non-opp code, provided we
> construct a table if need be, since we have the data required
> to do so ([FMIN=27MHz, FMAX=fast_rate]).

I was referring to a comment at dev_pm_opp_set_rate():

/*
* For IO devices which require an OPP on some platforms/SoCs
* while just needing to scale the clock on some others
* we look for empty OPP tables with just a clock handle and
* scale only the clk. This makes dev_pm_opp_set_rate()
* equivalent to a clk_set_rate()
*/

Maybe we just need to make sure that the OPP table exists
(devm_pm_opp_of_add_table) to prevent the function from bailing out
early.

>
> >
> >> I'm not sure these would still work, as I think we've got rid of some ugly
> >> clock getters that were looking for both "core" and "core_clk" etc.
> >
> > We still support core vs core_clk, see the get_clocks() at msm_gpu.c and 
> > then msm_clk_bulk_get_clock(). However we might mimick this function and 
> > call devm_pm_opp_set_clkname() with the proper name ("core" or "core_clk").
> >
> >>
> >> See 8db0b6c7b636376789e356d861c3c6c35dcb6913 for what seems to be the most 
> >> recent
> >> example of non-OPP.
> >>
> >> IMX51/53 also have no OPP tables and are using the (AFAIK) now-defunct 
> >> _clk-suffixed
> >> clock-names.
> >
> > It works, I tested it during this cycle.
> Oh okay, I had a feeling like that was dropped at one point..
>
> >
> >>
> >> I'd be more than happy to rip out some of this legacy code and convert it
> >> to something modern like OPP, but I'm not sure you guys would like it 
> >> considering
> >> the breakage on (arguably ancient and borderline retired) platforms.
> >
> > I think, we should try switching to OPP-for-everybody, granted the promise 
> > of dev_pm_opp_set_clk_rate() being backwards compatible with bare 
> > clk_set_rate().
> It's not, but as I mentioned, we can easily work around that.
>
> >
> >>
> >> This patch as-is "only" breaks non-OPP a5xx & a6xx (as they have .gpu_busy 
> >> defined),
> >> of which there are none..
> ...but we want to get devfreq everywhere and it's a few LoC away..
>
> Konrad
> >>
> >>>
>  +
> adreno_gpu->funcs = funcs;
> adreno_gpu->info = adreno_info(config->rev);
> adreno_gpu->gmem = adreno_gpu->info->gmem;
>  diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c 
>  b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
>  index e27dbf12b5e8..ea70c1c32d94 100644
>  --- 

Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-20 Thread Konrad Dybcio



On 20.02.2023 11:31, Krzysztof Kozlowski wrote:
> On 20/02/2023 11:24, Konrad Dybcio wrote:
>>
>>
>> On 18.02.2023 15:49, Krzysztof Kozlowski wrote:
>>> On 18/02/2023 12:23, Konrad Dybcio wrote:


 On 18.02.2023 11:14, Krzysztof Kozlowski wrote:
> On 17/02/2023 22:13, Bryan O'Donoghue wrote:
>> On 17/02/2023 12:24, Krzysztof Kozlowski wrote:
>>> First, it would be nice to know what was the intention of Bryan's 
>>> commit?
>>
>> Sorry I've been grazing this thread but, not responding.
>>
>> - qcom,dsi-ctrl-6g-qcm2290
>>
>> is non-compliant with qcom,socid-dsi-ctrl which is our desired naming 
>> convention, so that's what the deprecation is about i.e. moving this 
>> compat to "qcom,qcm2290-dsi-ctrl"
>
> OK, then there was no intention to deprecate qcom,mdss-dsi-ctrl and it
> should be left as allowed compatible.
 Not sure if we're on the same page.
>>>
>>> We are.
>>>

 It wasn't intended to deprecate [1] "qcom,qcm2290-dsi-ctrl", 
 "qcom-mdss-dsi-ctrl";
 (newly-introduced in Bryan's cleanup patchset) but it was intended to 
 deprecate
 [2] "qcom,dsi-ctrl-6g-qcm2290"; which was introduced long before that 
 *and* used in
 the 6115 dt (and it still is in linux-next today, as my cleanup hasn't 
 landed yet).

 [3] "qcom,dsi-ctrl-6g-qcm2290", "qcom,mdss-dsi-ctrl" was never used (and 
 should never
 be, considering there's a proper compatible [1] now) so adding it to 
 bindings
 didn't solve the undocumented-ness issue. Plus the fallback would have 
 never
 worked back then, as the DSI hw revision check would spit out 2.4.1 or 2.4.
 which is SC7180 or SDM845 and then it would never match the base register, 
 as
 they're waay different.
>>>
>>> All these were known. I was asking about "qcom,mdss-dsi-ctrl", because
>>> the original intention also affects the way we want to keep it now
>>> (unless there are other reasons).
>> Okay, so we want to deprecate:
>>
>> "qcom,dsi-ctrl-6g-qcm2290", "qcom,mdss-dsi-ctrl"
> 
> No, we don't want to deprecate it. Such compatible was never existing
> originally and was only introduced by mistake. We want to correct the
> mistake, but we don't want to deprecate such list.
> 
>>
>> because it is:
>>
>> 1) non-compliant with the qcom,socname-hwblock formula
>> 2) replaceable since we rely on the fallback compatible
>> 3) "qcom,dsi-ctrl-6g-qcm2290" alone would have been expected to
>>be fixed in the DTSI similar to other SoCs
>>
>> Is that correct?
> 
> No. So again, I am talking only about qcom,mdss-dsi-ctrl. Since
> beginning of this thread:
> 
> "Wasn't then intention to deprecate both - qcm2290 and mdss - when used
> alone?"
> 
> Why do you bring the list to the topic? The list was created by mistake
> and Bryan confirmed that it was never his intention.
Ugh.. I think I just misread your message in your second reply
counting from the beginning of the thread.. Things are much
clearer now that I re-read it..

So, just to confirm..

This patch, with the items: level dropped, is fine?

Konrad
> 
>>
>> Because 2) doesn't hold, as - at the time of the introduction
>> of Bryan's patchset - the fallback compatible would not have
>> been sufficient from the Linux POV [1]
> 
> There was no fallback compatible at that time.
> 
>> , though it would have been
>> sufficient from the hardware description POV, as the hardware
>> on the SoC *is* essentially what qcom,mdss-dsi-ctrl refers to.
>>
>> [1] The driver would simply not probe. It *would be* Linux-correct
>> after my code-fixing series was applied, but I think I'm just failing
>> to comprehend what sort of ABI we're trying to preserve here :/
> 
> Best regards,
> Krzysztof
> 


Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-20 Thread Krzysztof Kozlowski
On 20/02/2023 11:24, Konrad Dybcio wrote:
> 
> 
> On 18.02.2023 15:49, Krzysztof Kozlowski wrote:
>> On 18/02/2023 12:23, Konrad Dybcio wrote:
>>>
>>>
>>> On 18.02.2023 11:14, Krzysztof Kozlowski wrote:
 On 17/02/2023 22:13, Bryan O'Donoghue wrote:
> On 17/02/2023 12:24, Krzysztof Kozlowski wrote:
>> First, it would be nice to know what was the intention of Bryan's commit?
>
> Sorry I've been grazing this thread but, not responding.
>
> - qcom,dsi-ctrl-6g-qcm2290
>
> is non-compliant with qcom,socid-dsi-ctrl which is our desired naming 
> convention, so that's what the deprecation is about i.e. moving this 
> compat to "qcom,qcm2290-dsi-ctrl"

 OK, then there was no intention to deprecate qcom,mdss-dsi-ctrl and it
 should be left as allowed compatible.
>>> Not sure if we're on the same page.
>>
>> We are.
>>
>>>
>>> It wasn't intended to deprecate [1] "qcom,qcm2290-dsi-ctrl", 
>>> "qcom-mdss-dsi-ctrl";
>>> (newly-introduced in Bryan's cleanup patchset) but it was intended to 
>>> deprecate
>>> [2] "qcom,dsi-ctrl-6g-qcm2290"; which was introduced long before that *and* 
>>> used in
>>> the 6115 dt (and it still is in linux-next today, as my cleanup hasn't 
>>> landed yet).
>>>
>>> [3] "qcom,dsi-ctrl-6g-qcm2290", "qcom,mdss-dsi-ctrl" was never used (and 
>>> should never
>>> be, considering there's a proper compatible [1] now) so adding it to 
>>> bindings
>>> didn't solve the undocumented-ness issue. Plus the fallback would have never
>>> worked back then, as the DSI hw revision check would spit out 2.4.1 or 2.4.
>>> which is SC7180 or SDM845 and then it would never match the base register, 
>>> as
>>> they're waay different.
>>
>> All these were known. I was asking about "qcom,mdss-dsi-ctrl", because
>> the original intention also affects the way we want to keep it now
>> (unless there are other reasons).
> Okay, so we want to deprecate:
> 
> "qcom,dsi-ctrl-6g-qcm2290", "qcom,mdss-dsi-ctrl"

No, we don't want to deprecate it. Such compatible was never existing
originally and was only introduced by mistake. We want to correct the
mistake, but we don't want to deprecate such list.

> 
> because it is:
> 
> 1) non-compliant with the qcom,socname-hwblock formula
> 2) replaceable since we rely on the fallback compatible
> 3) "qcom,dsi-ctrl-6g-qcm2290" alone would have been expected to
>be fixed in the DTSI similar to other SoCs
> 
> Is that correct?

No. So again, I am talking only about qcom,mdss-dsi-ctrl. Since
beginning of this thread:

"Wasn't then intention to deprecate both - qcm2290 and mdss - when used
alone?"

Why do you bring the list to the topic? The list was created by mistake
and Bryan confirmed that it was never his intention.

> 
> Because 2) doesn't hold, as - at the time of the introduction
> of Bryan's patchset - the fallback compatible would not have
> been sufficient from the Linux POV [1]

There was no fallback compatible at that time.

> , though it would have been
> sufficient from the hardware description POV, as the hardware
> on the SoC *is* essentially what qcom,mdss-dsi-ctrl refers to.
> 
> [1] The driver would simply not probe. It *would be* Linux-correct
> after my code-fixing series was applied, but I think I'm just failing
> to comprehend what sort of ABI we're trying to preserve here :/

Best regards,
Krzysztof



Re: [PATCH 1/1] drm/panel: st7703: Fix vertical refresh rate of XBD599

2023-02-20 Thread Frank Oltmanns
Hi Ondřej,
hi all,

Ondřej Jirman  writes:
> On Sun, Feb 19, 2023 at 12:45:53PM +0100, Frank Oltmanns wrote:
>> Fix the XBD599 panel’s slight visual stutter by correcting the pixel
>> clock speed so that the panel’s 60Hz vertical refresh rate is met.
>>
>> Set the clock speed using the underlying formula instead of a magic
>> number. To have a consistent procedure for both panels, set the JH057N
>> panel’s clock also as a formula.
>>
>> —
>>  drivers/gpu/drm/panel/panel-sitronix-st7703.c | 4 ++–
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff –git a/drivers/gpu/drm/panel/panel-sitronix-st7703.c 
>> b/drivers/gpu/drm/panel/panel-sitronix-st7703.c
>> index 6747ca237ced..cd7d631f7573 100644
>> — a/drivers/gpu/drm/panel/panel-sitronix-st7703.c
>> +++ b/drivers/gpu/drm/panel/panel-sitronix-st7703.c
>> @@ -139,7 +139,7 @@ static const struct drm_display_mode jh057n00900_mode = {
>>  .vsync_start = 1440 + 20,
>>  .vsync_end   = 1440 + 20 + 4,
>>  .vtotal  = 1440 + 20 + 4 + 12,
>> -.clock   = 75276,
>> +.clock   = (720 + 90 + 20 + 20) * (1440 + 20 + 4 + 12) * 60 / 1000,
>>  .flags   = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
>>  .width_mm= 65,
>>  .height_mm   = 130,
>> @@ -324,7 +324,7 @@ static const struct drm_display_mode xbd599_mode = {
>>  .vsync_start = 1440 + 18,
>>  .vsync_end   = 1440 + 18 + 10,
>>  .vtotal  = 1440 + 18 + 10 + 17,
>> -.clock   = 69000,
>> +.clock   = (720 + 40 + 40 + 40) * (1440 + 18 + 10 + 17) * 60 / 1000,
>
> As for pinephone, A64 can’t produce 74.844 MHz precisely, so this will not 
> work.
>
> Better fix is to alter the mode so that clock can be something the only SoC 
> this
> panel is used with can actually produce.
>
> See eg. 
> 
> which is tested to actually produce 60Hz by measuring the vsync events against
> the CPU timer.
>
> Your patch will not produce the intended effect.
>
> kind regards,
>   o.
>

The TL;DR of my upcoming musings are: Thank you very much for your feedback! Any
recommendations for an informative read about the topic that you or anybody else
has, is greatly appreciated.

How did you measure the vsync events? Were you using vblank interrupts [1]?

I have to admit that I tested only visually and couldn’t spot a difference
between your patch and mine. I’ll need to put more thinking into this, and maybe
you or anyone reading this can help me with that.

My interpretation of the `struct drm_display_mode` documentation [2] was, that
these are logical dimensions/clocks that somewhere down the stack are converted
to their physical/hardware representation.

But now I’ve read the description of the struct’s “crtc_clock” member more
carefully. It says:
“Actual pixel or dot clock in the hardware. This differs from the
logical @clock when e.g. using interlacing, double-clocking, stereo
modes or other fancy stuff that changes the timings and signals
actually sent over the wire.”

So, can I say that if we don’t use “interlacing, double-clocking, stereo modes
or other fancy stuff” that `crtc_clock` will be equal to `clock` and therefore
we have to choose `clock` according to the SoC’s capabilities?

Also, I haven’t found a source about which values to use for the front and back
porch part of the panel and why can you just “arbitrarily” change those. My
assumption is, that those are just extra pixels we can add to make the
dimensions match the ratio of clock and vertical refresh rate. At least that
seems to be, what you did in your patch. But again, no source to back my
assumption about the range the porches can have.

I’ve put the following docs on my “to read and understand” list:
• Allwinner A64 User Manual (to learn more about the SoC’s TCON0 and what
  clock’s the SoC can produce)
• drm-internals.rst
• “Rendering PinePhone’s display” [3], to learn why it produces 69 MHz.
• Your commit message for the PinePhone Pro panel [4] (found on your blog:
  )

Is there anything else I should add?

Thank you again and best regards,
  Frank

[1] 

[2] 

[3] 
[4] 


>>  .flags   = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
>>  .width_mm= 68,
>>  .height_mm   = 136,
>> –
>> 2.39.1
>>


Re: [PATCH v2 1/2] dt-bindings: display/msm: dsi-controller-main: Fix deprecated QCM2290 compatible

2023-02-20 Thread Konrad Dybcio



On 18.02.2023 15:49, Krzysztof Kozlowski wrote:
> On 18/02/2023 12:23, Konrad Dybcio wrote:
>>
>>
>> On 18.02.2023 11:14, Krzysztof Kozlowski wrote:
>>> On 17/02/2023 22:13, Bryan O'Donoghue wrote:
 On 17/02/2023 12:24, Krzysztof Kozlowski wrote:
> First, it would be nice to know what was the intention of Bryan's commit?

 Sorry I've been grazing this thread but, not responding.

 - qcom,dsi-ctrl-6g-qcm2290

 is non-compliant with qcom,socid-dsi-ctrl which is our desired naming 
 convention, so that's what the deprecation is about i.e. moving this 
 compat to "qcom,qcm2290-dsi-ctrl"
>>>
>>> OK, then there was no intention to deprecate qcom,mdss-dsi-ctrl and it
>>> should be left as allowed compatible.
>> Not sure if we're on the same page.
> 
> We are.
> 
>>
>> It wasn't intended to deprecate [1] "qcom,qcm2290-dsi-ctrl", 
>> "qcom-mdss-dsi-ctrl";
>> (newly-introduced in Bryan's cleanup patchset) but it was intended to 
>> deprecate
>> [2] "qcom,dsi-ctrl-6g-qcm2290"; which was introduced long before that *and* 
>> used in
>> the 6115 dt (and it still is in linux-next today, as my cleanup hasn't 
>> landed yet).
>>
>> [3] "qcom,dsi-ctrl-6g-qcm2290", "qcom,mdss-dsi-ctrl" was never used (and 
>> should never
>> be, considering there's a proper compatible [1] now) so adding it to bindings
>> didn't solve the undocumented-ness issue. Plus the fallback would have never
>> worked back then, as the DSI hw revision check would spit out 2.4.1 or 2.4.
>> which is SC7180 or SDM845 and then it would never match the base register, as
>> they're waay different.
> 
> All these were known. I was asking about "qcom,mdss-dsi-ctrl", because
> the original intention also affects the way we want to keep it now
> (unless there are other reasons).
Okay, so we want to deprecate:

"qcom,dsi-ctrl-6g-qcm2290", "qcom,mdss-dsi-ctrl"

because it is:

1) non-compliant with the qcom,socname-hwblock formula
2) replaceable since we rely on the fallback compatible
3) "qcom,dsi-ctrl-6g-qcm2290" alone would have been expected to
   be fixed in the DTSI similar to other SoCs

Is that correct?

Because 2) doesn't hold, as - at the time of the introduction
of Bryan's patchset - the fallback compatible would not have
been sufficient from the Linux POV [1], though it would have been
sufficient from the hardware description POV, as the hardware
on the SoC *is* essentially what qcom,mdss-dsi-ctrl refers to.

[1] The driver would simply not probe. It *would be* Linux-correct
after my code-fixing series was applied, but I think I'm just failing
to comprehend what sort of ABI we're trying to preserve here :/

Konrad

> 
> Best regards,
> Krzysztof
> 


Re: [PATCH] drm/gem: Expose the buffer object handle to userspace last

2023-02-20 Thread Tvrtko Ursulin



On 20/02/2023 10:01, Christian König wrote:

Am 20.02.23 um 10:55 schrieb Tvrtko Ursulin:


Hi,

On 14/02/2023 13:59, Christian König wrote:

Am 14.02.23 um 13:50 schrieb Tvrtko Ursulin:

From: Tvrtko Ursulin 

Currently drm_gem_handle_create_tail exposes the handle to userspace
before the buffer object constructions is complete. This allowing
of working against a partially constructed object, which may also be in
the process of having its creation fail, can have a range of negative
outcomes.

A lot of those will depend on what the individual drivers are doing in
their obj->funcs->open() callbacks, and also with a common failure mode
being -ENOMEM from drm_vma_node_allow.

We can make sure none of this can happen by allocating a handle last,
although with a downside that more of the function now runs under the
dev->object_name_lock.

Looking into the individual drivers open() hooks, we have
amdgpu_gem_object_open which seems like it could have a potential 
security

issue without this change.

A couple drivers like qxl_gem_object_open and vmw_gem_object_open
implement no-op hooks so no impact for them.

A bunch of other require a deeper look by individual owners to asses 
for

impact. Those are lima_gem_object_open, nouveau_gem_object_open,
panfrost_gem_open, radeon_gem_object_open and 
virtio_gpu_gem_object_open.


Putting aside the risk assesment of the above, some common scenarios to
think about are along these lines:

1)
Userspace closes a handle by speculatively "guessing" it from a second
thread.

This results in an unreachable buffer object so, a memory leak.

2)
Same as 1), but object is in the process of getting closed (failed
creation).

The second thread is then able to re-cycle the handle and idr_remove 
would
in the first thread would then remove the handle it does not own 
from the

idr.

3)
Going back to the earlier per driver problem space - individual impact
assesment of allowing a second thread to access and operate on a 
partially

constructed handle / object. (Can something crash? Leak information?)

In terms of identifying when the problem started I will tag some 
patches

as references, but not all, if even any, of them actually point to a
broken state. I am just identifying points at which more opportunity 
for

issues to arise was added.


Yes I've looked into this once as well, but couldn't completely solve 
it for some reason.


Give me a day or two to get this tested and all the logic swapped 
back into my head again.


Managed to recollect what the problem with earlier attempts was?


Nope, that's way to long ago. I can only assume that I ran into problems 
with the object_name_lock.


Probably best to double check if that doesn't result in a lock inversion 
when somebody grabs the reservation lock in their ->load() callback.


Hmm I don't immediately follow the connection. But I have only found 
radeon_driver_load_kms as using the load callback. Is there any lockdep 
enabled CI for that driver which could tell us if there is a problem there?


Regards,

Tvrtko



Regards,
Christian.



Regards,

Tvrtko


Christian.



References: 304eda32920b ("drm/gem: add hooks to notify driver when 
object handle is created/destroyed")

References: ca481c9b2a3a ("drm/gem: implement vma access management")
References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
Cc: dri-devel@lists.freedesktop.org
Cc: Rob Clark 
Cc: Ben Skeggs 
Cc: David Herrmann 
Cc: Noralf Trønnes 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: amd-...@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: Steven Price 
Cc: virtualizat...@lists.linux-foundation.org
Cc: spice-de...@lists.freedesktop.org
Cc: Zack Rusin 
---
  drivers/gpu/drm/drm_gem.c | 48 
+++

  1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index aa15c52ae182..e3d897bca0f2 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file 
*file_priv,

 u32 *handlep)
  {
  struct drm_device *dev = obj->dev;
-    u32 handle;
  int ret;
WARN_ON(!mutex_is_locked(>object_name_lock));
  if (obj->handle_count++ == 0)
  drm_gem_object_get(obj);
+    ret = drm_vma_node_allow(>vma_node, file_priv);
+    if (ret)
+    goto err_put;
+
+    if (obj->funcs->open) {
+    ret = obj->funcs->open(obj, file_priv);
+    if (ret)
+    goto err_revoke;
+    }
+
  /*
- * Get the user-visible handle using idr.  Preload and perform
- * allocation under our spinlock.
+ * Get the user-visible handle using idr as the _last_ step.
+ * Preload and perform allocation under our spinlock.
   */
  idr_preload(GFP_KERNEL);
  spin_lock(_priv->table_lock);
-
  ret = idr_alloc(_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
-
  spin_unlock(_priv->table_lock);
  idr_preload_end();
-    

Re: [PATCH v4 0/6] drm: lcdif: Add i.MX93 LCDIF support

2023-02-20 Thread Alexander Stein
Hi Liu,

Am Montag, 20. Februar 2023, 09:55:19 CET schrieb Alexander Stein:
> Hi Liu,
> 
> Am Freitag, 17. Februar 2023, 09:59:14 CET schrieb Liu Ying:
> > On Fri, 2023-02-17 at 09:18 +0100, Alexander Stein wrote:
> > > Hi Liu,
> > 
> > Hi Alexander,
> > 
> > > Am Freitag, 17. Februar 2023, 07:54:01 CET schrieb Liu Ying:
> > > > Hi,
> > > > 
> > > > This patch set aims to add i.MX93 LCDIF display controller support
> > > > in the existing LCDIF DRM driver.  The LCDIF embedded in i.MX93 SoC
> > > > is essentially the same to those embedded in i.MX8mp SoC.  Through
> > > > internal bridges, i.MX93 LCDIF may drive a MIPI DSI display or a LVDS
> > > > display or a parallel display.
> > > > 
> > > > Patch 1/6 adds device tree binding support for i.MX93 LCDIF in the
> > > > existing fsl,lcdif.yaml.
> > > > 
> > > > Patch 2/6 drops lcdif->bridge NULL pointer check as a cleanup patch.
> > > > 
> > > > Patch 3/6~5/6 prepare for adding i.MX93 LCDIF support step by step.
> > > > 
> > > > Patch 6/6 adds i.MX93 LCDIF compatible string as the last step of
> > > > adding i.MX93 LCDIF support.
> > > 
> > > Thanks for the series. I could test this on my TQMa93xxLA/MBa93xxCA with
> > > a
> > > single LVDS display attached, so no DSI or parallel display. Hence I
> > > could
> > > not test the bus format and flags checks, but they look okay.
> > > So you can add
> > > Tested-by: Alexander Stein 
> > > to the whole series as well.
> > 
> > Thanks for your test.
> > 
> > > One thing I noticed is that, sometimes it seems that before probing
> > > lcdif
> > > my system completely freezes. Adding some debug output it seems that's
> > > during powering up the IMX93_MEDIABLK_PD_LCDIF power domain there is
> > > some
> > > race condition. But adding more more detailed output made the problem go
> > > away. Did you notice something similar? It might be a red hering though.
> > 
> > I don't see system freezing with my i.MX93 11x11 EVK when probing
> > lcdif. I did try to boot the system several times. All look ok. This is
> > a snippet of dmesg when lcdif probes:
> > 
> > --8<--
> > [0.753083] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
> > [0.761669] SuperH (H)SCI(F) driver initialized
> > [0.766523] msm_serial: driver initialized
> > [0.780523] printk: console [ttyLP0] enabled0x44380010 (irq = 16,
> > base_baud = 150) is a FSL_LPUART
> > [0.780523] printk: console [ttyLP0] enabled
> > [0.788928] printk: bootconsole [lpuart32] disabled
> > [0.788928] printk: bootconsole [lpuart32] disabled
> > [0.804632] panel-simple lvds_panel: supply power not found, using
> > dummy regulator
> > [0.814741] [drm] Initialized imx-lcdif 1.0.0 20220417 for
> > 4ae3.lcd-controller on minor 0
> > [1.195930] Console: switching to colour frame buffer device 160x50
> > [1.218385] imx-lcdif 4ae3.lcd-controller: [drm] fb0: imx-
> > lcdifdrmfb frame buffer device
> > [1.227099] cacheinfo: Unable to detect cache hierarchy for CPU 0
> > [1.236725] loop: module loaded
> > --8<--
> > 
> > ~300 milliseconds are consumed by the enablement delay required by the
> > "boe,ev121wxm-n10-1850" LVDS panel I use.
> 
> It seems you have the drivers compiled in. I use modules in my case and
> simple-panel as well. But this is unrelated, because lcdif_probe() is yet to
> be called. Using the debug diff from below I get the following output:
> 
> [   16.97] imx93-blk-ctrl 4ac1.system-controller:
> imx93_blk_ctrl_power_on: 1
> [   16.122491] imx93-blk-ctrl 4ac1.system-controller:
> imx93_blk_ctrl_power_on: 2
> [   16.137766] imx93-blk-ctrl 4ac1.system-controller:
> imx93_blk_ctrl_power_on: 3
> [   16.154905] imx93-blk-ctrl 4ac1.system-controller:
> imx93_blk_ctrl_power_on: 4
> 
> It seems setting BLK_CLK_EN blocks the whole system, even reading is not
> possible. I don't have any details on the hardware, but it seems that either
> some clock or power domain is not enabled. This can also happen if I'm
> loading the lcdif module manually after boot. But I can't detect any
> differences in / sys/kernel/debug/clk/clk_summary.

I think I found the cause. It's the maximum clock frequency for media_axi and 
media_apb. These clocks were not explicitly configured, most probably 
exceeding the maximum frequency allowed.

Best regards,
Alexander

> ---8<---
> diff --git a/drivers/soc/imx/imx93-blk-ctrl.c b/drivers/soc/imx/imx93-blk-
> ctrl.c
> index 2c600329436cf..50aeb20ce90dc 100644
> --- a/drivers/soc/imx/imx93-blk-ctrl.c
> +++ b/drivers/soc/imx/imx93-blk-ctrl.c
> @@ -129,12 +129,14 @@ static int imx93_blk_ctrl_power_on(struct
> generic_pm_domain *genpd)
>   struct imx93_blk_ctrl *bc = domain->bc;
>   int ret;
> 
> + dev_info(bc->dev, "%s: 1\n", __func__);
>   ret = clk_bulk_prepare_enable(bc->num_clks, bc->clks);
>   if (ret) {
>   

Re: [PATCH v2 06/14] drm/msm/gpu: Use dev_pm_opp_set_rate for non-GMU GPUs

2023-02-20 Thread Konrad Dybcio



On 20.02.2023 10:59, Konrad Dybcio wrote:
> 
> 
> On 18.02.2023 17:47, Dmitry Baryshkov wrote:
>> On 18/02/2023 13:04, Konrad Dybcio wrote:
>>>
>>>
>>> On 17.02.2023 22:07, Dmitry Baryshkov wrote:
 On 14/02/2023 19:31, Konrad Dybcio wrote:
> Currently we only utilize the OPP table connected to the GPU for
> getting (available) frequencies. We do however need to scale the
> voltage rail(s) accordingly to ensure that we aren't trying to
> run the GPU at 1GHz with a VDD_LOW vote, as that would result in
> an otherwise inexplainable hang.
>
> Tell the OPP framework that we want to scale the "core" clock
> and swap out the clk_set_rate to a dev_pm_opp_set_rate in
> msm_devfreq_target() to enable usage of required-opps and by
> extension proper voltage level/corner scaling.
>
> Signed-off-by: Konrad Dybcio 
> ---
>    drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 
>    drivers/gpu/drm/msm/msm_gpu_devfreq.c   | 2 +-
>    2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
> b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index ce6b76c45b6f..15e405e4f977 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -1047,6 +1047,10 @@ int adreno_gpu_init(struct drm_device *drm, struct 
> platform_device *pdev,
>    const char *gpu_name;
>    u32 speedbin;
>    +    /* This can only be done here, or devm_pm_opp_set_supported_hw 
> will WARN_ON() */
> +    if (!IS_ERR(devm_clk_get(dev, "core")))
> +    devm_pm_opp_set_clkname(dev, "core");

 Can we instead move a call to a6xx_set_supported_hw() / check_speed_bin 
 after the adreno_gpu_init() ? It will call msm_gpu_init, which in turn 
 sets gpu->core_clk.

 Ideally you can call devm_pm_opp_set_clkname() from that function.
>>>
>>>
 Or maybe completely drop gpu->core_clk and always use 
 devm_pm_opp_set_clk_rate().
>>> That would break non-OPP targets, last of which were probably added N=big 
>>> years ago..
>>
>> No. In the lack of OPP tables, dev_pm_opp_clk_set_rate() should behave 
>> exactly like the clk_set_rate().
> Not sure if that's what you meant, but if a device lacks OPP,
> devm_pm_opp_set_rate will return -ENODEV.
> 
> If you meant "if we can't find an opp table, behave as if we
> called clk_set_rate", a discussion on #freedreno with robclark
> indicates he'd accept getting rid of non-opp code, provided we
> construct a table if need be, since we have the data required
> to do so ([FMIN=27MHz, FMAX=fast_rate]).
Actually.. that's what happens for gpu-pwrlevels users already..
Well, use>r<, as apq8064 seems to have been the only user of
that upstream, ever..

And for A2XX it looks like it just unconditionally selects 200
MHz..

I think this could be simplified to:

if (opp exists)
// use opp
else if (adreno_is_a2xx)
dev_pm_opp_add(dev, 2, 0) //device, freq_hz, volt_uV
else if (adreno_is_a320)
dev_pm_opp_add(dev, 45000, 0)
else
// for now the driver sets 200mhz here, but i don't think
// it's reasonable to keep carrying that behavior for >a2xx
return -EINVAL


And then we can yank out all clk_set_rate calls just like that!

Konrad
> 
>>
>>> I'm not sure these would still work, as I think we've got rid of some ugly
>>> clock getters that were looking for both "core" and "core_clk" etc.
>>
>> We still support core vs core_clk, see the get_clocks() at msm_gpu.c and 
>> then msm_clk_bulk_get_clock(). However we might mimick this function and 
>> call devm_pm_opp_set_clkname() with the proper name ("core" or "core_clk").
>>
>>>
>>> See 8db0b6c7b636376789e356d861c3c6c35dcb6913 for what seems to be the most 
>>> recent
>>> example of non-OPP.
>>>
>>> IMX51/53 also have no OPP tables and are using the (AFAIK) now-defunct 
>>> _clk-suffixed
>>> clock-names.
>>
>> It works, I tested it during this cycle.
> Oh okay, I had a feeling like that was dropped at one point..
> 
>>
>>>
>>> I'd be more than happy to rip out some of this legacy code and convert it
>>> to something modern like OPP, but I'm not sure you guys would like it 
>>> considering
>>> the breakage on (arguably ancient and borderline retired) platforms.
>>
>> I think, we should try switching to OPP-for-everybody, granted the promise 
>> of dev_pm_opp_set_clk_rate() being backwards compatible with bare 
>> clk_set_rate().
> It's not, but as I mentioned, we can easily work around that.
> 
>>
>>>
>>> This patch as-is "only" breaks non-OPP a5xx & a6xx (as they have .gpu_busy 
>>> defined),
>>> of which there are none..
> ...but we want to get devfreq everywhere and it's a few LoC away..
> 
> Konrad
>>>

> +
>    adreno_gpu->funcs = funcs;
>    adreno_gpu->info = adreno_info(config->rev);
>    adreno_gpu->gmem = adreno_gpu->info->gmem;
> diff --git 

Re: [PATCH] drm/gem: Expose the buffer object handle to userspace last

2023-02-20 Thread Christian König

Am 20.02.23 um 10:55 schrieb Tvrtko Ursulin:


Hi,

On 14/02/2023 13:59, Christian König wrote:

Am 14.02.23 um 13:50 schrieb Tvrtko Ursulin:

From: Tvrtko Ursulin 

Currently drm_gem_handle_create_tail exposes the handle to userspace
before the buffer object constructions is complete. This allowing
of working against a partially constructed object, which may also be in
the process of having its creation fail, can have a range of negative
outcomes.

A lot of those will depend on what the individual drivers are doing in
their obj->funcs->open() callbacks, and also with a common failure mode
being -ENOMEM from drm_vma_node_allow.

We can make sure none of this can happen by allocating a handle last,
although with a downside that more of the function now runs under the
dev->object_name_lock.

Looking into the individual drivers open() hooks, we have
amdgpu_gem_object_open which seems like it could have a potential 
security

issue without this change.

A couple drivers like qxl_gem_object_open and vmw_gem_object_open
implement no-op hooks so no impact for them.

A bunch of other require a deeper look by individual owners to asses 
for

impact. Those are lima_gem_object_open, nouveau_gem_object_open,
panfrost_gem_open, radeon_gem_object_open and 
virtio_gpu_gem_object_open.


Putting aside the risk assesment of the above, some common scenarios to
think about are along these lines:

1)
Userspace closes a handle by speculatively "guessing" it from a second
thread.

This results in an unreachable buffer object so, a memory leak.

2)
Same as 1), but object is in the process of getting closed (failed
creation).

The second thread is then able to re-cycle the handle and idr_remove 
would
in the first thread would then remove the handle it does not own 
from the

idr.

3)
Going back to the earlier per driver problem space - individual impact
assesment of allowing a second thread to access and operate on a 
partially

constructed handle / object. (Can something crash? Leak information?)

In terms of identifying when the problem started I will tag some 
patches

as references, but not all, if even any, of them actually point to a
broken state. I am just identifying points at which more opportunity 
for

issues to arise was added.


Yes I've looked into this once as well, but couldn't completely solve 
it for some reason.


Give me a day or two to get this tested and all the logic swapped 
back into my head again.


Managed to recollect what the problem with earlier attempts was?


Nope, that's way to long ago. I can only assume that I ran into problems 
with the object_name_lock.


Probably best to double check if that doesn't result in a lock inversion 
when somebody grabs the reservation lock in their ->load() callback.


Regards,
Christian.



Regards,

Tvrtko


Christian.



References: 304eda32920b ("drm/gem: add hooks to notify driver when 
object handle is created/destroyed")

References: ca481c9b2a3a ("drm/gem: implement vma access management")
References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
Cc: dri-devel@lists.freedesktop.org
Cc: Rob Clark 
Cc: Ben Skeggs 
Cc: David Herrmann 
Cc: Noralf Trønnes 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: amd-...@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: Steven Price 
Cc: virtualizat...@lists.linux-foundation.org
Cc: spice-de...@lists.freedesktop.org
Cc: Zack Rusin 
---
  drivers/gpu/drm/drm_gem.c | 48 
+++

  1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index aa15c52ae182..e3d897bca0f2 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file 
*file_priv,

 u32 *handlep)
  {
  struct drm_device *dev = obj->dev;
-    u32 handle;
  int ret;
WARN_ON(!mutex_is_locked(>object_name_lock));
  if (obj->handle_count++ == 0)
  drm_gem_object_get(obj);
+    ret = drm_vma_node_allow(>vma_node, file_priv);
+    if (ret)
+    goto err_put;
+
+    if (obj->funcs->open) {
+    ret = obj->funcs->open(obj, file_priv);
+    if (ret)
+    goto err_revoke;
+    }
+
  /*
- * Get the user-visible handle using idr.  Preload and perform
- * allocation under our spinlock.
+ * Get the user-visible handle using idr as the _last_ step.
+ * Preload and perform allocation under our spinlock.
   */
  idr_preload(GFP_KERNEL);
  spin_lock(_priv->table_lock);
-
  ret = idr_alloc(_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
-
  spin_unlock(_priv->table_lock);
  idr_preload_end();
-    mutex_unlock(>object_name_lock);
  if (ret < 0)
-    goto err_unref;
-
-    handle = ret;
+    goto err_close;
-    ret = drm_vma_node_allow(>vma_node, file_priv);
-    if (ret)
-    goto err_remove;
+    mutex_unlock(>object_name_lock);
-    if (obj->funcs->open) {
-   

Re: [PATCH v2 06/14] drm/msm/gpu: Use dev_pm_opp_set_rate for non-GMU GPUs

2023-02-20 Thread Konrad Dybcio



On 18.02.2023 17:47, Dmitry Baryshkov wrote:
> On 18/02/2023 13:04, Konrad Dybcio wrote:
>>
>>
>> On 17.02.2023 22:07, Dmitry Baryshkov wrote:
>>> On 14/02/2023 19:31, Konrad Dybcio wrote:
 Currently we only utilize the OPP table connected to the GPU for
 getting (available) frequencies. We do however need to scale the
 voltage rail(s) accordingly to ensure that we aren't trying to
 run the GPU at 1GHz with a VDD_LOW vote, as that would result in
 an otherwise inexplainable hang.

 Tell the OPP framework that we want to scale the "core" clock
 and swap out the clk_set_rate to a dev_pm_opp_set_rate in
 msm_devfreq_target() to enable usage of required-opps and by
 extension proper voltage level/corner scaling.

 Signed-off-by: Konrad Dybcio 
 ---
    drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 
    drivers/gpu/drm/msm/msm_gpu_devfreq.c   | 2 +-
    2 files changed, 5 insertions(+), 1 deletion(-)

 diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
 b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
 index ce6b76c45b6f..15e405e4f977 100644
 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
 +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
 @@ -1047,6 +1047,10 @@ int adreno_gpu_init(struct drm_device *drm, struct 
 platform_device *pdev,
    const char *gpu_name;
    u32 speedbin;
    +    /* This can only be done here, or devm_pm_opp_set_supported_hw 
 will WARN_ON() */
 +    if (!IS_ERR(devm_clk_get(dev, "core")))
 +    devm_pm_opp_set_clkname(dev, "core");
>>>
>>> Can we instead move a call to a6xx_set_supported_hw() / check_speed_bin 
>>> after the adreno_gpu_init() ? It will call msm_gpu_init, which in turn sets 
>>> gpu->core_clk.
>>>
>>> Ideally you can call devm_pm_opp_set_clkname() from that function.
>>
>>
>>> Or maybe completely drop gpu->core_clk and always use 
>>> devm_pm_opp_set_clk_rate().
>> That would break non-OPP targets, last of which were probably added N=big 
>> years ago..
> 
> No. In the lack of OPP tables, dev_pm_opp_clk_set_rate() should behave 
> exactly like the clk_set_rate().
Not sure if that's what you meant, but if a device lacks OPP,
devm_pm_opp_set_rate will return -ENODEV.

If you meant "if we can't find an opp table, behave as if we
called clk_set_rate", a discussion on #freedreno with robclark
indicates he'd accept getting rid of non-opp code, provided we
construct a table if need be, since we have the data required
to do so ([FMIN=27MHz, FMAX=fast_rate]).

> 
>> I'm not sure these would still work, as I think we've got rid of some ugly
>> clock getters that were looking for both "core" and "core_clk" etc.
> 
> We still support core vs core_clk, see the get_clocks() at msm_gpu.c and then 
> msm_clk_bulk_get_clock(). However we might mimick this function and call 
> devm_pm_opp_set_clkname() with the proper name ("core" or "core_clk").
> 
>>
>> See 8db0b6c7b636376789e356d861c3c6c35dcb6913 for what seems to be the most 
>> recent
>> example of non-OPP.
>>
>> IMX51/53 also have no OPP tables and are using the (AFAIK) now-defunct 
>> _clk-suffixed
>> clock-names.
> 
> It works, I tested it during this cycle.
Oh okay, I had a feeling like that was dropped at one point..

> 
>>
>> I'd be more than happy to rip out some of this legacy code and convert it
>> to something modern like OPP, but I'm not sure you guys would like it 
>> considering
>> the breakage on (arguably ancient and borderline retired) platforms.
> 
> I think, we should try switching to OPP-for-everybody, granted the promise of 
> dev_pm_opp_set_clk_rate() being backwards compatible with bare clk_set_rate().
It's not, but as I mentioned, we can easily work around that.

> 
>>
>> This patch as-is "only" breaks non-OPP a5xx & a6xx (as they have .gpu_busy 
>> defined),
>> of which there are none..
...but we want to get devfreq everywhere and it's a few LoC away..

Konrad
>>
>>>
 +
    adreno_gpu->funcs = funcs;
    adreno_gpu->info = adreno_info(config->rev);
    adreno_gpu->gmem = adreno_gpu->info->gmem;
 diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c 
 b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
 index e27dbf12b5e8..ea70c1c32d94 100644
 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
 +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
 @@ -48,7 +48,7 @@ static int msm_devfreq_target(struct device *dev, 
 unsigned long *freq,
    gpu->funcs->gpu_set_freq(gpu, opp, df->suspended);
    mutex_unlock(>lock);
    } else {
 -    clk_set_rate(gpu->core_clk, *freq);
 +    dev_pm_opp_set_rate(dev, *freq);
>>>
>>> This is not enough, there are calls to clk_set_rate(gpu->core_clk) in 
>>> msm_gpu.c which are called from the suspend/resume path.
>> Right, good catch.
>>
>> Konrad
>>>
    }
      dev_pm_opp_put(opp);
>>>
> 


Re: [PATCH] drm/gem: Expose the buffer object handle to userspace last

2023-02-20 Thread Tvrtko Ursulin



Hi,

On 14/02/2023 13:59, Christian König wrote:

Am 14.02.23 um 13:50 schrieb Tvrtko Ursulin:

From: Tvrtko Ursulin 

Currently drm_gem_handle_create_tail exposes the handle to userspace
before the buffer object constructions is complete. This allowing
of working against a partially constructed object, which may also be in
the process of having its creation fail, can have a range of negative
outcomes.

A lot of those will depend on what the individual drivers are doing in
their obj->funcs->open() callbacks, and also with a common failure mode
being -ENOMEM from drm_vma_node_allow.

We can make sure none of this can happen by allocating a handle last,
although with a downside that more of the function now runs under the
dev->object_name_lock.

Looking into the individual drivers open() hooks, we have
amdgpu_gem_object_open which seems like it could have a potential 
security

issue without this change.

A couple drivers like qxl_gem_object_open and vmw_gem_object_open
implement no-op hooks so no impact for them.

A bunch of other require a deeper look by individual owners to asses for
impact. Those are lima_gem_object_open, nouveau_gem_object_open,
panfrost_gem_open, radeon_gem_object_open and virtio_gpu_gem_object_open.

Putting aside the risk assesment of the above, some common scenarios to
think about are along these lines:

1)
Userspace closes a handle by speculatively "guessing" it from a second
thread.

This results in an unreachable buffer object so, a memory leak.

2)
Same as 1), but object is in the process of getting closed (failed
creation).

The second thread is then able to re-cycle the handle and idr_remove 
would

in the first thread would then remove the handle it does not own from the
idr.

3)
Going back to the earlier per driver problem space - individual impact
assesment of allowing a second thread to access and operate on a 
partially

constructed handle / object. (Can something crash? Leak information?)

In terms of identifying when the problem started I will tag some patches
as references, but not all, if even any, of them actually point to a
broken state. I am just identifying points at which more opportunity for
issues to arise was added.


Yes I've looked into this once as well, but couldn't completely solve it 
for some reason.


Give me a day or two to get this tested and all the logic swapped back 
into my head again.


Managed to recollect what the problem with earlier attempts was?

Regards,

Tvrtko


Christian.



References: 304eda32920b ("drm/gem: add hooks to notify driver when 
object handle is created/destroyed")

References: ca481c9b2a3a ("drm/gem: implement vma access management")
References: b39b5394fabc ("drm/gem: Add drm_gem_object_funcs")
Cc: dri-devel@lists.freedesktop.org
Cc: Rob Clark 
Cc: Ben Skeggs 
Cc: David Herrmann 
Cc: Noralf Trønnes 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: amd-...@lists.freedesktop.org
Cc: l...@lists.freedesktop.org
Cc: nouv...@lists.freedesktop.org
Cc: Steven Price 
Cc: virtualizat...@lists.linux-foundation.org
Cc: spice-de...@lists.freedesktop.org
Cc: Zack Rusin 
---
  drivers/gpu/drm/drm_gem.c | 48 +++
  1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index aa15c52ae182..e3d897bca0f2 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -356,52 +356,52 @@ drm_gem_handle_create_tail(struct drm_file 
*file_priv,

 u32 *handlep)
  {
  struct drm_device *dev = obj->dev;
-    u32 handle;
  int ret;
  WARN_ON(!mutex_is_locked(>object_name_lock));
  if (obj->handle_count++ == 0)
  drm_gem_object_get(obj);
+    ret = drm_vma_node_allow(>vma_node, file_priv);
+    if (ret)
+    goto err_put;
+
+    if (obj->funcs->open) {
+    ret = obj->funcs->open(obj, file_priv);
+    if (ret)
+    goto err_revoke;
+    }
+
  /*
- * Get the user-visible handle using idr.  Preload and perform
- * allocation under our spinlock.
+ * Get the user-visible handle using idr as the _last_ step.
+ * Preload and perform allocation under our spinlock.
   */
  idr_preload(GFP_KERNEL);
  spin_lock(_priv->table_lock);
-
  ret = idr_alloc(_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
-
  spin_unlock(_priv->table_lock);
  idr_preload_end();
-    mutex_unlock(>object_name_lock);
  if (ret < 0)
-    goto err_unref;
-
-    handle = ret;
+    goto err_close;
-    ret = drm_vma_node_allow(>vma_node, file_priv);
-    if (ret)
-    goto err_remove;
+    mutex_unlock(>object_name_lock);
-    if (obj->funcs->open) {
-    ret = obj->funcs->open(obj, file_priv);
-    if (ret)
-    goto err_revoke;
-    }
+    *handlep = ret;
-    *handlep = handle;
  return 0;
+err_close:
+    if (obj->funcs->close)
+    obj->funcs->close(obj, file_priv);
  err_revoke:
  drm_vma_node_revoke(>vma_node, file_priv);
-err_remove:
-    

[PATCH 3/4] habanalabs: change hw_fini to return int to indicate error

2023-02-20 Thread Oded Gabbay
From: Dafna Hirschfeld 

We later use cpucp packet for soft reset which might fail
so we should be able propagate the failure case.

Signed-off-by: Dafna Hirschfeld 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/accel/habanalabs/common/habanalabs.h | 2 +-
 drivers/accel/habanalabs/gaudi/gaudi.c   | 5 +++--
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 5 +++--
 drivers/accel/habanalabs/goya/goya.c | 5 +++--
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h 
b/drivers/accel/habanalabs/common/habanalabs.h
index de4ff525cbcb..597c7f1037d1 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1576,7 +1576,7 @@ struct hl_asic_funcs {
int (*sw_init)(struct hl_device *hdev);
int (*sw_fini)(struct hl_device *hdev);
int (*hw_init)(struct hl_device *hdev);
-   void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
+   int (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool 
fw_reset);
int (*suspend)(struct hl_device *hdev);
int (*resume)(struct hl_device *hdev);
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c 
b/drivers/accel/habanalabs/gaudi/gaudi.c
index a276a2a4a46d..26287084a9e0 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -4069,7 +4069,7 @@ static int gaudi_hw_init(struct hl_device *hdev)
return rc;
 }
 
-static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool 
fw_reset)
+static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool 
fw_reset)
 {
struct cpu_dyn_regs *dyn_regs =
>fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
@@ -4079,7 +4079,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool 
hard_reset, bool fw_reset
 
if (!hard_reset) {
dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
-   return;
+   return 0;
}
 
if (hdev->pldm) {
@@ -4216,6 +4216,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool 
hard_reset, bool fw_reset
 
hdev->device_cpu_is_halted = false;
}
+   return 0;
 }
 
 static int gaudi_suspend(struct hl_device *hdev)
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c 
b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 2f51a121909b..5a225f23961b 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -5885,7 +5885,7 @@ static void gaudi2_get_soft_rst_done_indication(struct 
hl_device *hdev, u32 poll
reg_val);
 }
 
-static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool 
fw_reset)
+static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool 
fw_reset)
 {
struct gaudi2_device *gaudi2 = hdev->asic_specific;
u32 poll_timeout_us, reset_sleep_ms;
@@ -5951,7 +5951,7 @@ static void gaudi2_hw_fini(struct hl_device *hdev, bool 
hard_reset, bool fw_rese
gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
 
if (!gaudi2)
-   return;
+   return 0;
 
gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
@@ -5978,6 +5978,7 @@ static void gaudi2_hw_fini(struct hl_device *hdev, bool 
hard_reset, bool fw_rese
HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
HW_CAP_ROT_MASK);
}
+   return 0;
 }
 
 static int gaudi2_suspend(struct hl_device *hdev)
diff --git a/drivers/accel/habanalabs/goya/goya.c 
b/drivers/accel/habanalabs/goya/goya.c
index c5a22a8e0957..7a45ab3ca43a 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -2783,7 +2783,7 @@ static int goya_hw_init(struct hl_device *hdev)
return rc;
 }
 
-static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool 
fw_reset)
+static int goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
struct goya_device *goya = hdev->asic_specific;
u32 reset_timeout_ms, cpu_timeout_ms, status;
@@ -2839,7 +2839,7 @@ static void goya_hw_fini(struct hl_device *hdev, bool 
hard_reset, bool fw_reset)
HW_CAP_GOLDEN | HW_CAP_TPC);
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_SOFT_RESET);
-   return;
+   return 0;
}
 
/* Chicken bit to re-initiate boot sequencer flow */
@@ -2858,6 +2858,7 @@ static void goya_hw_fini(struct hl_device *hdev, bool 
hard_reset, bool fw_reset)
 
memset(goya->events_stat, 0, sizeof(goya->events_stat));
}
+   return 0;
 }
 
 int 

[PATCH 4/4] habanalabs/gaudi2: remove unneeded irq_handler variable

2023-02-20 Thread Oded Gabbay
From: Tomer Tayar 

'irq_handler' in gaudi2_enable_msix(), is just assigned with a function
name and then used when calling request_threaded_irq().
Remove the variable and use the function name directly as an argument.

Signed-off-by: Tomer Tayar 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c 
b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 5a225f23961b..2021ef9d4702 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -3974,7 +3974,6 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
struct asic_fixed_properties *prop = >asic_prop;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
int rc, irq, i, j, user_irq_init_cnt;
-   irq_handler_t irq_handler;
struct hl_cq *cq;
 
if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
@@ -4024,10 +4023,9 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
i++, j++, user_irq_init_cnt++) {
 
irq = pci_irq_vector(hdev->pdev, i);
-   irq_handler = hl_irq_handler_user_interrupt;
-
-   rc = request_threaded_irq(irq, irq_handler, 
hl_irq_user_interrupt_thread_handler,
-   IRQF_ONESHOT, gaudi2_irq_name(i), 
>user_interrupt[j]);
+   rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
+   
hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+   gaudi2_irq_name(i), 
>user_interrupt[j]);
 
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
-- 
2.25.1



[PATCH 2/4] habanalabs: improve readability of engines idle mask print

2023-02-20 Thread Oded Gabbay
From: Tomer Tayar 

Remove leading zeroes when printing the idle mask to make it clearer.

Signed-off-by: Tomer Tayar 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/accel/habanalabs/common/device.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/accel/habanalabs/common/device.c 
b/drivers/accel/habanalabs/common/device.c
index a5f5ee102823..e544d00fe376 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -380,18 +380,17 @@ bool hl_ctrl_device_operational(struct hl_device *hdev,
 static void print_idle_status_mask(struct hl_device *hdev, const char *message,
u64 
idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE])
 {
-   u32 pad_width[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {};
-
-   BUILD_BUG_ON(HL_BUSY_ENGINES_MASK_EXT_SIZE != 4);
-
-   pad_width[3] = idle_mask[3] ? 16 : 0;
-   pad_width[2] = idle_mask[2] || pad_width[3] ? 16 : 0;
-   pad_width[1] = idle_mask[1] || pad_width[2] ? 16 : 0;
-   pad_width[0] = idle_mask[0] || pad_width[1] ? 16 : 0;
-
-   dev_err(hdev->dev, "%s (mask %0*llx_%0*llx_%0*llx_%0*llx)\n",
-   message, pad_width[3], idle_mask[3], pad_width[2], idle_mask[2],
-   pad_width[1], idle_mask[1], pad_width[0], idle_mask[0]);
+   if (idle_mask[3])
+   dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n",
+   message, idle_mask[3], idle_mask[2], idle_mask[1], 
idle_mask[0]);
+   else if (idle_mask[2])
+   dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n",
+   message, idle_mask[2], idle_mask[1], idle_mask[0]);
+   else if (idle_mask[1])
+   dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n",
+   message, idle_mask[1], idle_mask[0]);
+   else
+   dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]);
 }
 
 static void hpriv_release(struct kref *ref)
-- 
2.25.1



[PATCH 1/4] habanalabs: organize hl_device structure comment

2023-02-20 Thread Oded Gabbay
From: Sagiv Ozeri 

Make the comments align with the order of the fields in the structure

Signed-off-by: Sagiv Ozeri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/accel/habanalabs/common/habanalabs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/accel/habanalabs/common/habanalabs.h 
b/drivers/accel/habanalabs/common/habanalabs.h
index 7b6b4ff20a3b..de4ff525cbcb 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3296,6 +3296,8 @@ struct hl_reset_info {
  * @supports_mmu_prefetch: true if prefetch is supported, otherwise false.
  * @reset_upon_device_release: reset the device when the user closes the file 
descriptor of the
  * device.
+ * @supports_ctx_switch: true if a ctx switch is required upon first 
submission.
+ * @support_preboot_binning: true if we support read binning info from preboot.
  * @nic_ports_mask: Controls which NIC ports are enabled. Used only for 
testing.
  * @fw_components: Controls which f/w components to load to the device. There 
are multiple f/w
  * stages and sometimes we want to stop at a certain stage. 
Used only for testing.
@@ -3309,8 +3311,6 @@ struct hl_reset_info {
  * Used only for testing.
  * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the 
f/w, which verifies
  * that the f/w is always alive. Used only for testing.
- * @supports_ctx_switch: true if a ctx switch is required upon first 
submission.
- * @support_preboot_binning: true if we support read binning info from preboot.
  */
 struct hl_device {
struct pci_dev  *pdev;
@@ -3457,7 +3457,7 @@ struct hl_device {
u8  supports_ctx_switch;
u8  support_preboot_binning;
 
-   /* Parameters for bring-up */
+   /* Parameters for bring-up to be upstreamed */
u64 nic_ports_mask;
u64 fw_components;
u8  mmu_enable;
-- 
2.25.1



Re: [PATCH v4 10/14] drm/vblank: Add helper to get next vblank time

2023-02-20 Thread Pekka Paalanen
On Sat, 18 Feb 2023 13:15:53 -0800
Rob Clark  wrote:

> From: Rob Clark 
> 
> Will be used in the next commit to set a deadline on fences that an
> atomic update is waiting on.
> 
> Signed-off-by: Rob Clark 
> ---
>  drivers/gpu/drm/drm_vblank.c | 32 
>  include/drm/drm_vblank.h |  1 +
>  2 files changed, 33 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
> index 2ff31717a3de..caf25ebb34c5 100644
> --- a/drivers/gpu/drm/drm_vblank.c
> +++ b/drivers/gpu/drm/drm_vblank.c
> @@ -980,6 +980,38 @@ u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
>  }
>  EXPORT_SYMBOL(drm_crtc_vblank_count_and_time);
>  
> +/**
> + * drm_crtc_next_vblank_time - calculate the time of the next vblank
> + * @crtc: the crtc for which to calculate next vblank time
> + * @vblanktime: pointer to time to receive the next vblank timestamp.
> + *
> + * Calculate the expected time of the next vblank based on time of previous
> + * vblank and frame duration

Hi,

for VRR this targets the highest frame rate possible for the current
VRR mode, right?


Thanks,
pq

> + */
> +int drm_crtc_next_vblank_time(struct drm_crtc *crtc, ktime_t *vblanktime)
> +{
> + unsigned int pipe = drm_crtc_index(crtc);
> + struct drm_vblank_crtc *vblank = >dev->vblank[pipe];
> + u64 count;
> +
> + if (!vblank->framedur_ns)
> + return -EINVAL;
> +
> + count = drm_vblank_count_and_time(crtc->dev, pipe, vblanktime);
> +
> + /*
> +  * If we don't get a valid count, then we probably also don't
> +  * have a valid time:
> +  */
> + if (!count)
> + return -EINVAL;
> +
> + *vblanktime = ktime_add(*vblanktime, ns_to_ktime(vblank->framedur_ns));
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(drm_crtc_next_vblank_time);
> +
>  static void send_vblank_event(struct drm_device *dev,
>   struct drm_pending_vblank_event *e,
>   u64 seq, ktime_t now)
> diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h
> index 733a3e2d1d10..a63bc2c92f3c 100644
> --- a/include/drm/drm_vblank.h
> +++ b/include/drm/drm_vblank.h
> @@ -230,6 +230,7 @@ bool drm_dev_has_vblank(const struct drm_device *dev);
>  u64 drm_crtc_vblank_count(struct drm_crtc *crtc);
>  u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
>  ktime_t *vblanktime);
> +int drm_crtc_next_vblank_time(struct drm_crtc *crtc, ktime_t *vblanktime);
>  void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
>  struct drm_pending_vblank_event *e);
>  void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,



pgpUU45qLklSo.pgp
Description: OpenPGP digital signature


Re: [PATCH v4 09/14] drm/syncobj: Add deadline support for syncobj waits

2023-02-20 Thread Pekka Paalanen
On Sat, 18 Feb 2023 13:15:52 -0800
Rob Clark  wrote:

> From: Rob Clark 
> 
> Add a new flag to let userspace provide a deadline as a hint for syncobj
> and timeline waits.  This gives a hint to the driver signaling the
> backing fences about how soon userspace needs it to compete work, so it
> can addjust GPU frequency accordingly.  An immediate deadline can be
> given to provide something equivalent to i915 "wait boost".
> 
> Signed-off-by: Rob Clark 
> ---
> 
> I'm a bit on the fence about the addition of the DRM_CAP, but it seems
> useful to give userspace a way to probe whether the kernel and driver
> supports the new wait flag, especially since we have vk-common code
> dealing with syncobjs.  But open to suggestions.
> 
>  drivers/gpu/drm/drm_ioctl.c   |  3 ++
>  drivers/gpu/drm/drm_syncobj.c | 59 ---
>  include/drm/drm_drv.h |  6 
>  include/uapi/drm/drm.h| 16 --
>  4 files changed, 71 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index 7c9d66ee917d..1c5c942cf0f9 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -254,6 +254,9 @@ static int drm_getcap(struct drm_device *dev, void *data, 
> struct drm_file *file_
>   case DRM_CAP_SYNCOBJ_TIMELINE:
>   req->value = drm_core_check_feature(dev, 
> DRIVER_SYNCOBJ_TIMELINE);
>   return 0;
> + case DRM_CAP_SYNCOBJ_DEADLINE:
> + req->value = drm_core_check_feature(dev, 
> DRIVER_SYNCOBJ_TIMELINE);

Hi,

is that a typo for DRIVER_SYNCOBJ_DEADLINE?

> + return 0;
>   }
>  
>   /* Other caps only work with KMS drivers */
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 0c2be8360525..61cf97972a60 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -973,7 +973,8 @@ static signed long drm_syncobj_array_wait_timeout(struct 
> drm_syncobj **syncobjs,
> uint32_t count,
> uint32_t flags,
> signed long timeout,
> -   uint32_t *idx)
> +   uint32_t *idx,
> +   ktime_t *deadline)
>  {
>   struct syncobj_wait_entry *entries;
>   struct dma_fence *fence;
> @@ -1053,6 +1054,15 @@ static signed long 
> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
>   drm_syncobj_fence_add_wait(syncobjs[i], [i]);
>   }
>  
> + if (deadline) {
> + for (i = 0; i < count; ++i) {
> + fence = entries[i].fence;
> + if (!fence)
> + continue;
> + dma_fence_set_deadline(fence, *deadline);
> + }
> + }
> +
>   do {
>   set_current_state(TASK_INTERRUPTIBLE);
>  
> @@ -1151,7 +1161,8 @@ static int drm_syncobj_array_wait(struct drm_device 
> *dev,
> struct drm_file *file_private,
> struct drm_syncobj_wait *wait,
> struct drm_syncobj_timeline_wait 
> *timeline_wait,
> -   struct drm_syncobj **syncobjs, bool timeline)
> +   struct drm_syncobj **syncobjs, bool timeline,
> +   ktime_t *deadline)
>  {
>   signed long timeout = 0;
>   uint32_t first = ~0;
> @@ -1162,7 +1173,8 @@ static int drm_syncobj_array_wait(struct drm_device 
> *dev,
>NULL,
>wait->count_handles,
>wait->flags,
> -  timeout, );
> +  timeout, ,
> +  deadline);
>   if (timeout < 0)
>   return timeout;
>   wait->first_signaled = first;
> @@ -1172,7 +1184,8 @@ static int drm_syncobj_array_wait(struct drm_device 
> *dev,
>
> u64_to_user_ptr(timeline_wait->points),
>
> timeline_wait->count_handles,
>timeline_wait->flags,
> -  timeout, );
> +  timeout, ,
> +  deadline);
>   if (timeout < 0)
>   return timeout;
>   timeline_wait->first_signaled = first;
> @@ -1243,13 +1256,20 @@ 

Re: [PATCH v11 3/9] drm/display: Add Type-C switch helpers

2023-02-20 Thread Pin-yen Lin
I think I accidentally used HTML mode for the previous email. Sorry about that.

On Mon, Feb 20, 2023 at 4:41 PM Pin-yen Lin  wrote:
>
> Hi Andi,
>
> Thanks for the review.
>
> On Wed, Feb 8, 2023 at 5:25 AM Andi Shyti  wrote:
>>
>> Hi Pin-yen,
>>
>> [...]
>>
>> > +static int drm_dp_register_mode_switch(struct device *dev,
>> > +struct fwnode_handle *fwnode,
>> > +struct drm_dp_typec_switch_desc 
>> > *switch_desc,
>> > +void *data, typec_mux_set_fn_t 
>> > mux_set)
>> > +{
>> > + struct drm_dp_typec_port_data *port_data;
>> > + struct typec_mux_desc mux_desc = {};
>> > + char name[32];
>> > + u32 port_num;
>> > + int ret;
>> > +
>> > + ret = fwnode_property_read_u32(fwnode, "reg", _num);
>> > + if (ret) {
>> > + dev_err(dev, "Failed to read reg property: %d\n", ret);
>> > + return ret;
>> > + }
>> > +
>> > + port_data = _desc->typec_ports[port_num];
>> > + port_data->data = data;
>> > + port_data->port_num = port_num;
>> > + port_data->fwnode = fwnode;
>> > + mux_desc.fwnode = fwnode;
>> > + mux_desc.drvdata = port_data;
>> > + snprintf(name, sizeof(name), "%pfwP-%u", fwnode, port_num);
>> > + mux_desc.name = name;
>> > + mux_desc.set = mux_set;
>> > +
>> > + port_data->typec_mux = typec_mux_register(dev, _desc);
>> > + if (IS_ERR(port_data->typec_mux)) {
>> > + ret = PTR_ERR(port_data->typec_mux);
>> > + dev_err(dev, "Mode switch register for port %d failed: %d\n",
>> > + port_num, ret);
>> > +
>> > + return ret;
>>
>> you don't need this return here...
>>
>> > + }
>> > +
>> > + return 0;
>>
>> Just "return ret;" here.

This was actually suggested by Angelo in [1]. I personally don't have
a strong opinion on either approach.

[1]https://lore.kernel.org/all/023519eb-0adb-3b08-71b9-afb92a6cc...@collabora.com/

Pin-yen
>>
>>
>> > +}
>> > +
>> > +/**
>> > + * drm_dp_register_typec_switches() - register Type-C switches
>> > + * @dev: Device that registers Type-C switches
>> > + * @port: Device node for the switch
>> > + * @switch_desc: A Type-C switch descriptor
>> > + * @data: Private data for the switches
>> > + * @mux_set: Callback function for typec_mux_set
>> > + *
>> > + * This function registers USB Type-C switches for DP bridges that can 
>> > switch
>> > + * the output signal between their output pins.
>> > + *
>> > + * Currently only mode switches are implemented, and the function assumes 
>> > the
>> > + * given @port device node has endpoints with "mode-switch" property.
>> > + * The port number is determined by the "reg" property of the endpoint.
>> > + */
>> > +int drm_dp_register_typec_switches(struct device *dev, struct 
>> > fwnode_handle *port,
>> > +struct drm_dp_typec_switch_desc 
>> > *switch_desc,
>> > +void *data, typec_mux_set_fn_t mux_set)
>> > +{
>> > + struct fwnode_handle *sw;
>> > + int ret;
>> > +
>> > + fwnode_for_each_child_node(port, sw) {
>> > + if (fwnode_property_present(sw, "mode-switch"))
>> > + switch_desc->num_typec_switches++;
>> > + }
>>
>> no need for brackets here
>>
>> > +
>> > + if (!switch_desc->num_typec_switches) {
>> > + dev_dbg(dev, "No Type-C switches node found\n");
>>
>> dev_warn()?
>
>
> I used dev_dbg here because the users might call this without checking if 
> there are mode switch endpoints present, and this is the case for the current 
> users (it6505 and anx7625). If we use dev_warn here, there will be warnings 
> every time even on use cases without Type-C switches.
>
> Thanks and regards,
> Pin-yen
>>
>>
>> > + return 0;
>> > + }
>> > +
>> > + switch_desc->typec_ports = devm_kcalloc(
>> > + dev, switch_desc->num_typec_switches,
>> > + sizeof(struct drm_dp_typec_port_data), GFP_KERNEL);
>> > +
>> > + if (!switch_desc->typec_ports)
>> > + return -ENOMEM;
>> > +
>> > + /* Register switches for each connector. */
>> > + fwnode_for_each_child_node(port, sw) {
>> > + if (!fwnode_property_present(sw, "mode-switch"))
>> > + continue;
>> > + ret = drm_dp_register_mode_switch(dev, sw, switch_desc, 
>> > data, mux_set);
>> > + if (ret)
>> > + goto err_unregister_typec_switches;
>> > + }
>> > +
>> > + return 0;
>> > +
>> > +err_unregister_typec_switches:
>> > + fwnode_handle_put(sw);
>> > + drm_dp_unregister_typec_switches(switch_desc);
>> > + dev_err(dev, "Failed to register mode switch: %d\n", ret);
>>
>> there is a bit of dmesg spamming. Please choose where you want to
>> print the error, either in this function or in
>> drm_dp_register_mode_switch().
>>
>> Andi
>>
>> > + return ret;
>> > +}
>> > 

Re: [PATCH v4 0/6] drm: lcdif: Add i.MX93 LCDIF support

2023-02-20 Thread Alexander Stein
Hi Liu,

Am Freitag, 17. Februar 2023, 09:59:14 CET schrieb Liu Ying:
> On Fri, 2023-02-17 at 09:18 +0100, Alexander Stein wrote:
> > Hi Liu,
> 
> Hi Alexander,
> 
> > Am Freitag, 17. Februar 2023, 07:54:01 CET schrieb Liu Ying:
> > > Hi,
> > > 
> > > This patch set aims to add i.MX93 LCDIF display controller support
> > > in the existing LCDIF DRM driver.  The LCDIF embedded in i.MX93 SoC
> > > is essentially the same to those embedded in i.MX8mp SoC.  Through
> > > internal bridges, i.MX93 LCDIF may drive a MIPI DSI display or a LVDS
> > > display or a parallel display.
> > > 
> > > Patch 1/6 adds device tree binding support for i.MX93 LCDIF in the
> > > existing fsl,lcdif.yaml.
> > > 
> > > Patch 2/6 drops lcdif->bridge NULL pointer check as a cleanup patch.
> > > 
> > > Patch 3/6~5/6 prepare for adding i.MX93 LCDIF support step by step.
> > > 
> > > Patch 6/6 adds i.MX93 LCDIF compatible string as the last step of
> > > adding i.MX93 LCDIF support.
> > 
> > Thanks for the series. I could test this on my TQMa93xxLA/MBa93xxCA with a
> > single LVDS display attached, so no DSI or parallel display. Hence I could
> > not test the bus format and flags checks, but they look okay.
> > So you can add
> > Tested-by: Alexander Stein 
> > to the whole series as well.
> 
> Thanks for your test.
> 
> > One thing I noticed is that, sometimes it seems that before probing lcdif
> > my system completely freezes. Adding some debug output it seems that's
> > during powering up the IMX93_MEDIABLK_PD_LCDIF power domain there is some
> > race condition. But adding more more detailed output made the problem go
> > away. Did you notice something similar? It might be a red hering though.
> I don't see system freezing with my i.MX93 11x11 EVK when probing
> lcdif. I did try to boot the system several times. All look ok. This is
> a snippet of dmesg when lcdif probes:
> 
> --8<--
> [0.753083] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
> [0.761669] SuperH (H)SCI(F) driver initialized
> [0.766523] msm_serial: driver initialized
> [0.780523] printk: console [ttyLP0] enabled0x44380010 (irq = 16,
> base_baud = 150) is a FSL_LPUART
> [0.780523] printk: console [ttyLP0] enabled
> [0.788928] printk: bootconsole [lpuart32] disabled
> [0.788928] printk: bootconsole [lpuart32] disabled
> [0.804632] panel-simple lvds_panel: supply power not found, using
> dummy regulator
> [0.814741] [drm] Initialized imx-lcdif 1.0.0 20220417 for
> 4ae3.lcd-controller on minor 0
> [1.195930] Console: switching to colour frame buffer device 160x50
> [1.218385] imx-lcdif 4ae3.lcd-controller: [drm] fb0: imx-
> lcdifdrmfb frame buffer device
> [1.227099] cacheinfo: Unable to detect cache hierarchy for CPU 0
> [1.236725] loop: module loaded
> --8<--
> 
> ~300 milliseconds are consumed by the enablement delay required by the
> "boe,ev121wxm-n10-1850" LVDS panel I use.

It seems you have the drivers compiled in. I use modules in my case and 
simple-panel as well. But this is unrelated, because lcdif_probe() is yet to 
be called. Using the debug diff from below I get the following output:

[   16.97] imx93-blk-ctrl 4ac1.system-controller: 
imx93_blk_ctrl_power_on: 1
[   16.122491] imx93-blk-ctrl 4ac1.system-controller: 
imx93_blk_ctrl_power_on: 2
[   16.137766] imx93-blk-ctrl 4ac1.system-controller: 
imx93_blk_ctrl_power_on: 3
[   16.154905] imx93-blk-ctrl 4ac1.system-controller: 
imx93_blk_ctrl_power_on: 4

It seems setting BLK_CLK_EN blocks the whole system, even reading is not 
possible. I don't have any details on the hardware, but it seems that either 
some clock or power domain is not enabled. This can also happen if I'm loading 
the lcdif module manually after boot. But I can't detect any differences in /
sys/kernel/debug/clk/clk_summary.

---8<---
diff --git a/drivers/soc/imx/imx93-blk-ctrl.c b/drivers/soc/imx/imx93-blk-
ctrl.c
index 2c600329436cf..50aeb20ce90dc 100644
--- a/drivers/soc/imx/imx93-blk-ctrl.c
+++ b/drivers/soc/imx/imx93-blk-ctrl.c
@@ -129,12 +129,14 @@ static int imx93_blk_ctrl_power_on(struct 
generic_pm_domain *genpd)
struct imx93_blk_ctrl *bc = domain->bc;
int ret;
 
+   dev_info(bc->dev, "%s: 1\n", __func__);
ret = clk_bulk_prepare_enable(bc->num_clks, bc->clks);
if (ret) {
dev_err(bc->dev, "failed to enable bus clocks\n");
return ret;
}
 
+   dev_info(bc->dev, "%s: 2\n", __func__);
ret = clk_bulk_prepare_enable(data->num_clks, domain->clks);
if (ret) {
clk_bulk_disable_unprepare(bc->num_clks, bc->clks);
@@ -142,6 +144,7 @@ static int imx93_blk_ctrl_power_on(struct 
generic_pm_domain *genpd)
return ret;
}
 
+   dev_info(bc->dev, "%s: 3\n", __func__);
ret = 

Re: [PATCH v4 06/14] dma-buf/sync_file: Support (E)POLLPRI

2023-02-20 Thread Pekka Paalanen
On Sat, 18 Feb 2023 13:15:49 -0800
Rob Clark  wrote:

> From: Rob Clark 
> 
> Allow userspace to use the EPOLLPRI/POLLPRI flag to indicate an urgent
> wait (as opposed to a "housekeeping" wait to know when to cleanup after
> some work has completed).  Usermode components of GPU driver stacks
> often poll() on fence fd's to know when it is safe to do things like
> free or reuse a buffer, but they can also poll() on a fence fd when
> waiting to read back results from the GPU.  The EPOLLPRI/POLLPRI flag
> lets the kernel differentiate these two cases.
> 
> Signed-off-by: Rob Clark 

Hi,

where would the UAPI documentation of this go?
It seems to be missing.

If a Wayland compositor is polling application fences to know which
client buffer to use in its rendering, should the compositor poll with
PRI or not? If a compositor polls with PRI, then all fences from all
applications would always be PRI. Would that be harmful somehow or
would it be beneficial?


Thanks,
pq

> ---
>  drivers/dma-buf/sync_file.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> index fb6ca1032885..c30b2085ee0a 100644
> --- a/drivers/dma-buf/sync_file.c
> +++ b/drivers/dma-buf/sync_file.c
> @@ -192,6 +192,14 @@ static __poll_t sync_file_poll(struct file *file, 
> poll_table *wait)
>  {
>   struct sync_file *sync_file = file->private_data;
>  
> + /*
> +  * The POLLPRI/EPOLLPRI flag can be used to signal that
> +  * userspace wants the fence to signal ASAP, express this
> +  * as an immediate deadline.
> +  */
> + if (poll_requested_events(wait) & EPOLLPRI)
> + dma_fence_set_deadline(sync_file->fence, ktime_get());
> +
>   poll_wait(file, _file->wq, wait);
>  
>   if (list_empty(_file->cb.node) &&



pgpkXA9v3Sl5L.pgp
Description: OpenPGP digital signature


Re: [PATCH v4 05/14] dma-buf/sync_file: Add SET_DEADLINE ioctl

2023-02-20 Thread Pekka Paalanen
On Sat, 18 Feb 2023 13:15:48 -0800
Rob Clark  wrote:

> From: Rob Clark 
> 
> The initial purpose is for igt tests, but this would also be useful for
> compositors that wait until close to vblank deadline to make decisions
> about which frame to show.
> 
> The igt tests can be found at:
> 
> https://gitlab.freedesktop.org/robclark/igt-gpu-tools/-/commits/fence-deadline
> 
> v2: Clarify the timebase, add link to igt tests
> 
> Signed-off-by: Rob Clark 
> ---
>  drivers/dma-buf/sync_file.c| 19 +++
>  include/uapi/linux/sync_file.h | 22 ++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> index af57799c86ce..fb6ca1032885 100644
> --- a/drivers/dma-buf/sync_file.c
> +++ b/drivers/dma-buf/sync_file.c
> @@ -350,6 +350,22 @@ static long sync_file_ioctl_fence_info(struct sync_file 
> *sync_file,
>   return ret;
>  }
>  
> +static int sync_file_ioctl_set_deadline(struct sync_file *sync_file,
> + unsigned long arg)
> +{
> + struct sync_set_deadline ts;
> +
> + if (copy_from_user(, (void __user *)arg, sizeof(ts)))
> + return -EFAULT;
> +
> + if (ts.pad)
> + return -EINVAL;
> +
> + dma_fence_set_deadline(sync_file->fence, ktime_set(ts.tv_sec, 
> ts.tv_nsec));
> +
> + return 0;
> +}
> +
>  static long sync_file_ioctl(struct file *file, unsigned int cmd,
>   unsigned long arg)
>  {
> @@ -362,6 +378,9 @@ static long sync_file_ioctl(struct file *file, unsigned 
> int cmd,
>   case SYNC_IOC_FILE_INFO:
>   return sync_file_ioctl_fence_info(sync_file, arg);
>  
> + case SYNC_IOC_SET_DEADLINE:
> + return sync_file_ioctl_set_deadline(sync_file, arg);
> +
>   default:
>   return -ENOTTY;
>   }
> diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
> index ee2dcfb3d660..c8666580816f 100644
> --- a/include/uapi/linux/sync_file.h
> +++ b/include/uapi/linux/sync_file.h
> @@ -67,6 +67,20 @@ struct sync_file_info {
>   __u64   sync_fence_info;
>  };
>  
> +/**
> + * struct sync_set_deadline - set a deadline on a fence
> + * @tv_sec:  seconds elapsed since epoch
> + * @tv_nsec: nanoseconds elapsed since the time given by the tv_sec

Hi,

should tv_sec,tv_nsec be validated like clock_settime() does?

It requires:
- tv_sec >= 0
- tv_nsec >= 0
- tv_nsec < 1e9


Thanks,
pq


> + * @pad: must be zero
> + *
> + * The timebase for the deadline is CLOCK_MONOTONIC (same as vblank)
> + */
> +struct sync_set_deadline {
> + __s64   tv_sec;
> + __s32   tv_nsec;
> + __u32   pad;
> +};
> +
>  #define SYNC_IOC_MAGIC   '>'
>  
>  /**
> @@ -95,4 +109,12 @@ struct sync_file_info {
>   */
>  #define SYNC_IOC_FILE_INFO   _IOWR(SYNC_IOC_MAGIC, 4, struct sync_file_info)
>  
> +
> +/**
> + * DOC: SYNC_IOC_SET_DEADLINE - set a deadline on a fence
> + *
> + * Allows userspace to set a deadline on a fence, see 
> dma_fence_set_deadline()
> + */
> +#define SYNC_IOC_SET_DEADLINE_IOW(SYNC_IOC_MAGIC, 5, struct 
> sync_set_deadline)
> +
>  #endif /* _UAPI_LINUX_SYNC_H */



pgpIzA12wixh9.pgp
Description: OpenPGP digital signature


Re: [PATCH v11 3/9] drm/display: Add Type-C switch helpers

2023-02-20 Thread Pin-yen Lin
Hi Andi,

Thanks for the review.

On Wed, Feb 8, 2023 at 5:25 AM Andi Shyti 
wrote:

> Hi Pin-yen,
>
> [...]
>
> > +static int drm_dp_register_mode_switch(struct device *dev,
> > +struct fwnode_handle *fwnode,
> > +struct drm_dp_typec_switch_desc
> *switch_desc,
> > +void *data, typec_mux_set_fn_t
> mux_set)
> > +{
> > + struct drm_dp_typec_port_data *port_data;
> > + struct typec_mux_desc mux_desc = {};
> > + char name[32];
> > + u32 port_num;
> > + int ret;
> > +
> > + ret = fwnode_property_read_u32(fwnode, "reg", _num);
> > + if (ret) {
> > + dev_err(dev, "Failed to read reg property: %d\n", ret);
> > + return ret;
> > + }
> > +
> > + port_data = _desc->typec_ports[port_num];
> > + port_data->data = data;
> > + port_data->port_num = port_num;
> > + port_data->fwnode = fwnode;
> > + mux_desc.fwnode = fwnode;
> > + mux_desc.drvdata = port_data;
> > + snprintf(name, sizeof(name), "%pfwP-%u", fwnode, port_num);
> > + mux_desc.name = name;
> > + mux_desc.set = mux_set;
> > +
> > + port_data->typec_mux = typec_mux_register(dev, _desc);
> > + if (IS_ERR(port_data->typec_mux)) {
> > + ret = PTR_ERR(port_data->typec_mux);
> > + dev_err(dev, "Mode switch register for port %d failed:
> %d\n",
> > + port_num, ret);
> > +
> > + return ret;
>
> you don't need this return here...
>
> > + }
> > +
> > + return 0;
>
> Just "return ret;" here.


> > +}
> > +
> > +/**
> > + * drm_dp_register_typec_switches() - register Type-C switches
> > + * @dev: Device that registers Type-C switches
> > + * @port: Device node for the switch
> > + * @switch_desc: A Type-C switch descriptor
> > + * @data: Private data for the switches
> > + * @mux_set: Callback function for typec_mux_set
> > + *
> > + * This function registers USB Type-C switches for DP bridges that can
> switch
> > + * the output signal between their output pins.
> > + *
> > + * Currently only mode switches are implemented, and the function
> assumes the
> > + * given @port device node has endpoints with "mode-switch" property.
> > + * The port number is determined by the "reg" property of the endpoint.
> > + */
> > +int drm_dp_register_typec_switches(struct device *dev, struct
> fwnode_handle *port,
> > +struct drm_dp_typec_switch_desc
> *switch_desc,
> > +void *data, typec_mux_set_fn_t mux_set)
> > +{
> > + struct fwnode_handle *sw;
> > + int ret;
> > +
> > + fwnode_for_each_child_node(port, sw) {
> > + if (fwnode_property_present(sw, "mode-switch"))
> > + switch_desc->num_typec_switches++;
> > + }
>
> no need for brackets here
>
> > +
> > + if (!switch_desc->num_typec_switches) {
> > + dev_dbg(dev, "No Type-C switches node found\n");
>
> dev_warn()?
>

I used dev_dbg here because the users might call this without checking if
there are mode switch endpoints present, and this is the case for the
current users (it6505 and anx7625). If we use dev_warn here, there will be
warnings every time even on use cases without Type-C switches.

Thanks and regards,
Pin-yen

>
> > + return 0;
> > + }
> > +
> > + switch_desc->typec_ports = devm_kcalloc(
> > + dev, switch_desc->num_typec_switches,
> > + sizeof(struct drm_dp_typec_port_data), GFP_KERNEL);
> > +
> > + if (!switch_desc->typec_ports)
> > + return -ENOMEM;
> > +
> > + /* Register switches for each connector. */
> > + fwnode_for_each_child_node(port, sw) {
> > + if (!fwnode_property_present(sw, "mode-switch"))
> > + continue;
> > + ret = drm_dp_register_mode_switch(dev, sw, switch_desc,
> data, mux_set);
> > + if (ret)
> > + goto err_unregister_typec_switches;
> > + }
> > +
> > + return 0;
> > +
> > +err_unregister_typec_switches:
> > + fwnode_handle_put(sw);
> > + drm_dp_unregister_typec_switches(switch_desc);
> > + dev_err(dev, "Failed to register mode switch: %d\n", ret);
>
> there is a bit of dmesg spamming. Please choose where you want to
> print the error, either in this function or in
> drm_dp_register_mode_switch().
>
> Andi
>
> > + return ret;
> > +}
> > +EXPORT_SYMBOL(drm_dp_register_typec_switches);
>
> [...]
>


Re: [PATCH v3 0/2] Don't use stolen memory or BAR mappings for ring buffers

2023-02-20 Thread Tvrtko Ursulin



On 16/02/2023 01:10, john.c.harri...@intel.com wrote:

From: John Harrison 

Instruction from hardware arch is that stolen memory and BAR mappings
are unsafe for use as ring buffers. There can be issues with cache
aliasing due to the CPU access going to memory via the BAR. So, don't
do it.

v2: Dont use BAR mappings either.
Make conditional on LLC so as not to change platforms that don't need
to change (Daniele).
Add 'Fixes' tags (Tvrtko).
v3: Fix dumb typo.

Signed-off-by: John Harrison 


John Harrison (2):
   drm/i915: Don't use stolen memory for ring buffers with LLC
   drm/i915: Don't use BAR mappings for ring buffers with LLC

  drivers/gpu/drm/i915/gt/intel_ring.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)


It is doing what it laid out as the problem statement so series looks 
good to me.


Acked-by: Tvrtko Ursulin 

Regards,

Tvrtko




Re: [PATCH v2 2/2] drm: document DRM_IOCTL_PRIME_HANDLE_TO_FD and PRIME_FD_TO_HANDLE

2023-02-20 Thread Pekka Paalanen
On Fri, 17 Feb 2023 16:22:04 +
Simon Ser  wrote:

> v2: mention caps, note that the IOCTLs might fail, document that
> user-space needs a data structure to keep track of the
> handles (Daniel V.)
> 
> Signed-off-by: Simon Ser 
> Cc: Daniel Vetter 
> Cc: Pekka Paalanen 
> Cc: Daniel Stone 
> ---
>  include/uapi/drm/drm.h | 30 ++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 292e4778a2f4..a87ca2d4 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -1025,7 +1025,37 @@ extern "C" {
>  #define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock)
>  #define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock)
>  
> +/**
> + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD.
> + *
> + * User-space sets _prime_handle.handle with the GEM handle to export and
> + * _prime_handle.flags, and gets back a DMA-BUF file descriptor in
> + * _prime_handle.fd.
> + *
> + * The export can fail for any driver-specific reason, e.g. because export is
> + * not supported for this specific GEM handle (but might be for others).
> + *
> + * Support for exporting DMA-BUFs is advertised via _PRIME_CAP_EXPORT.
> + */
>  #define DRM_IOCTL_PRIME_HANDLE_TO_FDDRM_IOWR(0x2d, struct 
> drm_prime_handle)
> +/**
> + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle.
> + *
> + * User-space sets _prime_handle.fd with a DMA-BUF file descriptor to
> + * import, and gets back a GEM handle in _prime_handle.handle.
> + * _prime_handle.flags is unused.
> + *
> + * If an existing GEM handle refers to the memory object backing the DMA-BUF,
> + * that GEM handle is returned. Therefore user-space which needs to handle
> + * arbitrary DMA-BUFs must have a user-space lookup data structure to 
> manually
> + * reference-count duplicated GEM handles. For more information see
> + * _IOCTL_GEM_CLOSE.
> + *
> + * The import can fail for any driver-specific reason, e.g. because import is
> + * only supported for DMA-BUFs allocated on this DRM device.
> + *
> + * Support for importing DMA-BUFs is advertised via _PRIME_CAP_IMPORT.
> + */
>  #define DRM_IOCTL_PRIME_FD_TO_HANDLEDRM_IOWR(0x2e, struct 
> drm_prime_handle)
>  
>  #define DRM_IOCTL_AGP_ACQUIREDRM_IO(  0x30)

This patch too

Acked-by: Pekka Paalanen 


Thanks,
pq


pgpgmdPCeQvVy.pgp
Description: OpenPGP digital signature


Re: [PATCH v4 06/14] dma-buf/sync_file: Support (E)POLLPRI

2023-02-20 Thread Christian König

Am 18.02.23 um 22:15 schrieb Rob Clark:

From: Rob Clark 

Allow userspace to use the EPOLLPRI/POLLPRI flag to indicate an urgent
wait (as opposed to a "housekeeping" wait to know when to cleanup after
some work has completed).  Usermode components of GPU driver stacks
often poll() on fence fd's to know when it is safe to do things like
free or reuse a buffer, but they can also poll() on a fence fd when
waiting to read back results from the GPU.  The EPOLLPRI/POLLPRI flag
lets the kernel differentiate these two cases.

Signed-off-by: Rob Clark 


The code looks clean, but the different poll flags and their meaning are 
certainly not my field of expertise.


Feel free to add Acked-by: Christian König , 
somebody with more background in this should probably take a look as well.


Regards,
Christian.


---
  drivers/dma-buf/sync_file.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index fb6ca1032885..c30b2085ee0a 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -192,6 +192,14 @@ static __poll_t sync_file_poll(struct file *file, 
poll_table *wait)
  {
struct sync_file *sync_file = file->private_data;
  
+	/*

+* The POLLPRI/EPOLLPRI flag can be used to signal that
+* userspace wants the fence to signal ASAP, express this
+* as an immediate deadline.
+*/
+   if (poll_requested_events(wait) & EPOLLPRI)
+   dma_fence_set_deadline(sync_file->fence, ktime_get());
+
poll_wait(file, _file->wq, wait);
  
  	if (list_empty(_file->cb.node) &&




Re: [PATCH v4 07/14] dma-buf/sw_sync: Add fence deadline support

2023-02-20 Thread Christian König

Am 18.02.23 um 22:15 schrieb Rob Clark:

From: Rob Clark 

This consists of simply storing the most recent deadline, and adding an
ioctl to retrieve the deadline.  This can be used in conjunction with
the SET_DEADLINE ioctl on a fence fd for testing.  Ie. create various
sw_sync fences, merge them into a fence-array, set deadline on the
fence-array and confirm that it is propagated properly to each fence.

Signed-off-by: Rob Clark 


Reviewed-by: Christian König 


---
  drivers/dma-buf/sw_sync.c| 58 
  drivers/dma-buf/sync_debug.h |  2 ++
  2 files changed, 60 insertions(+)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 348b3a9170fa..50f2638cccd3 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -52,12 +52,26 @@ struct sw_sync_create_fence_data {
__s32   fence; /* fd of new fence */
  };
  
+/**

+ * struct sw_sync_get_deadline - get the deadline of a sw_sync fence
+ * @tv_sec:seconds elapsed since epoch (out)
+ * @tv_nsec:   nanoseconds elapsed since the time given by the tv_sec (out)
+ * @fence_fd:  the sw_sync fence fd (in)
+ */
+struct sw_sync_get_deadline {
+   __s64   tv_sec;
+   __s32   tv_nsec;
+   __s32   fence_fd;
+};
+
  #define SW_SYNC_IOC_MAGIC 'W'
  
  #define SW_SYNC_IOC_CREATE_FENCE	_IOWR(SW_SYNC_IOC_MAGIC, 0,\

struct sw_sync_create_fence_data)
  
  #define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)

+#define SW_SYNC_GET_DEADLINE   _IOWR(SW_SYNC_IOC_MAGIC, 2, \
+   struct sw_sync_get_deadline)
  
  static const struct dma_fence_ops timeline_fence_ops;
  
@@ -171,6 +185,13 @@ static void timeline_fence_timeline_value_str(struct dma_fence *fence,

snprintf(str, size, "%d", parent->value);
  }
  
+static void timeline_fence_set_deadline(struct dma_fence *fence, ktime_t deadline)

+{
+   struct sync_pt *pt = dma_fence_to_sync_pt(fence);
+
+   pt->deadline = deadline;
+}
+
  static const struct dma_fence_ops timeline_fence_ops = {
.get_driver_name = timeline_fence_get_driver_name,
.get_timeline_name = timeline_fence_get_timeline_name,
@@ -179,6 +200,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
.release = timeline_fence_release,
.fence_value_str = timeline_fence_value_str,
.timeline_value_str = timeline_fence_timeline_value_str,
+   .set_deadline = timeline_fence_set_deadline,
  };
  
  /**

@@ -387,6 +409,39 @@ static long sw_sync_ioctl_inc(struct sync_timeline *obj, 
unsigned long arg)
return 0;
  }
  
+static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long arg)

+{
+   struct sw_sync_get_deadline data;
+   struct timespec64 ts;
+   struct dma_fence *fence;
+   struct sync_pt *pt;
+
+   if (copy_from_user(, (void __user *)arg, sizeof(data)))
+   return -EFAULT;
+
+   if (data.tv_sec || data.tv_nsec)
+   return -EINVAL;
+
+   fence = sync_file_get_fence(data.fence_fd);
+   if (!fence)
+   return -EINVAL;
+
+   pt = dma_fence_to_sync_pt(fence);
+   if (!pt)
+   return -EINVAL;
+
+   ts = ktime_to_timespec64(pt->deadline);
+   data.tv_sec  = ts.tv_sec;
+   data.tv_nsec = ts.tv_nsec;
+
+   dma_fence_put(fence);
+
+   if (copy_to_user((void __user *)arg, , sizeof(data)))
+   return -EFAULT;
+
+   return 0;
+}
+
  static long sw_sync_ioctl(struct file *file, unsigned int cmd,
  unsigned long arg)
  {
@@ -399,6 +454,9 @@ static long sw_sync_ioctl(struct file *file, unsigned int 
cmd,
case SW_SYNC_IOC_INC:
return sw_sync_ioctl_inc(obj, arg);
  
+	case SW_SYNC_GET_DEADLINE:

+   return sw_sync_ioctl_get_deadline(obj, arg);
+
default:
return -ENOTTY;
}
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index 6176e52ba2d7..2e0146d0bdbb 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -55,11 +55,13 @@ static inline struct sync_timeline *dma_fence_parent(struct 
dma_fence *fence)
   * @base: base fence object
   * @link: link on the sync timeline's list
   * @node: node in the sync timeline's tree
+ * @deadline: the most recently set fence deadline
   */
  struct sync_pt {
struct dma_fence base;
struct list_head link;
struct rb_node node;
+   ktime_t deadline;
  };
  
  extern const struct file_operations sw_sync_debugfs_fops;




Re: [PATCH v4 05/14] dma-buf/sync_file: Add SET_DEADLINE ioctl

2023-02-20 Thread Christian König

Am 18.02.23 um 22:15 schrieb Rob Clark:

From: Rob Clark 

The initial purpose is for igt tests, but this would also be useful for
compositors that wait until close to vblank deadline to make decisions
about which frame to show.

The igt tests can be found at:

https://gitlab.freedesktop.org/robclark/igt-gpu-tools/-/commits/fence-deadline

v2: Clarify the timebase, add link to igt tests

Signed-off-by: Rob Clark 
---
  drivers/dma-buf/sync_file.c| 19 +++
  include/uapi/linux/sync_file.h | 22 ++
  2 files changed, 41 insertions(+)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..fb6ca1032885 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -350,6 +350,22 @@ static long sync_file_ioctl_fence_info(struct sync_file 
*sync_file,
return ret;
  }
  
+static int sync_file_ioctl_set_deadline(struct sync_file *sync_file,

+   unsigned long arg)
+{
+   struct sync_set_deadline ts;
+
+   if (copy_from_user(, (void __user *)arg, sizeof(ts)))
+   return -EFAULT;
+
+   if (ts.pad)
+   return -EINVAL;
+
+   dma_fence_set_deadline(sync_file->fence, ktime_set(ts.tv_sec, 
ts.tv_nsec));
+
+   return 0;
+}
+
  static long sync_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
  {
@@ -362,6 +378,9 @@ static long sync_file_ioctl(struct file *file, unsigned int 
cmd,
case SYNC_IOC_FILE_INFO:
return sync_file_ioctl_fence_info(sync_file, arg);
  
+	case SYNC_IOC_SET_DEADLINE:

+   return sync_file_ioctl_set_deadline(sync_file, arg);
+
default:
return -ENOTTY;
}
diff --git a/include/uapi/linux/sync_file.h b/include/uapi/linux/sync_file.h
index ee2dcfb3d660..c8666580816f 100644
--- a/include/uapi/linux/sync_file.h
+++ b/include/uapi/linux/sync_file.h
@@ -67,6 +67,20 @@ struct sync_file_info {
__u64   sync_fence_info;
  };
  
+/**

+ * struct sync_set_deadline - set a deadline on a fence
+ * @tv_sec:seconds elapsed since epoch
+ * @tv_nsec:   nanoseconds elapsed since the time given by the tv_sec
+ * @pad:   must be zero
+ *
+ * The timebase for the deadline is CLOCK_MONOTONIC (same as vblank)
+ */
+struct sync_set_deadline {
+   __s64   tv_sec;
+   __s32   tv_nsec;
+   __u32   pad;


IIRC struct timespec defined this as time_t/long (which is horrible for 
an UAPI because of the sizeof(long) dependency), one possible 
alternative is to use 64bit nanoseconds from CLOCK_MONOTONIC (which is 
essentially ktime).


Not 100% sure if there is any preferences documented, but I think the 
later might be better.


Either way the patch is Acked-by: Christian König 
 for this patch.


Regards,
Christian.


+};
+
  #define SYNC_IOC_MAGIC'>'
  
  /**

@@ -95,4 +109,12 @@ struct sync_file_info {
   */
  #define SYNC_IOC_FILE_INFO_IOWR(SYNC_IOC_MAGIC, 4, struct sync_file_info)
  
+

+/**
+ * DOC: SYNC_IOC_SET_DEADLINE - set a deadline on a fence
+ *
+ * Allows userspace to set a deadline on a fence, see dma_fence_set_deadline()
+ */
+#define SYNC_IOC_SET_DEADLINE  _IOW(SYNC_IOC_MAGIC, 5, struct 
sync_set_deadline)
+
  #endif /* _UAPI_LINUX_SYNC_H */




Re: [PATCH v4 04/14] dma-buf/dma-resv: Add a way to set fence deadline

2023-02-20 Thread Christian König

Am 18.02.23 um 22:15 schrieb Rob Clark:

From: Rob Clark 

Add a way to set a deadline on remaining resv fences according to the
requested usage.

Signed-off-by: Rob Clark 
---
  drivers/dma-buf/dma-resv.c | 19 +++
  include/linux/dma-resv.h   |  2 ++
  2 files changed, 21 insertions(+)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 1c76aed8e262..0c86f6d577ab 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -684,6 +684,25 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum 
dma_resv_usage usage,
  }
  EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
  
+/**

+ * dma_resv_set_deadline - Set a deadline on reservation's objects fences
+ * @obj: the reservation object
+ * @usage: controls which fences to include, see enum dma_resv_usage.
+ * @deadline: the requested deadline (MONOTONIC)


Please add an additional description line, something like "Can be called 
without holding the dma_resv lock and sets @deadline on all fences 
filtered by @usage.".


With that done the patch is Reviewed-by: Christian König 



Regards,
Christian.


+ */
+void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage,
+  ktime_t deadline)
+{
+   struct dma_resv_iter cursor;
+   struct dma_fence *fence;
+
+   dma_resv_iter_begin(, obj, usage);
+   dma_resv_for_each_fence_unlocked(, fence) {
+   dma_fence_set_deadline(fence, deadline);
+   }
+   dma_resv_iter_end();
+}
+EXPORT_SYMBOL_GPL(dma_resv_set_deadline);
  
  /**

   * dma_resv_test_signaled - Test if a reservation object's fences have been
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 0637659a702c..8d0e34dad446 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -479,6 +479,8 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum 
dma_resv_usage usage,
  int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
  long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
   bool intr, unsigned long timeout);
+void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage,
+  ktime_t deadline);
  bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage);
  void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
  




Re: [PATCH v2 0/3] Resolve warnings from AMDGPU

2023-02-20 Thread Christian König

Acked-by: Christian König  for the whole series.

Am 17.02.23 um 19:14 schrieb Arthur Grillo:

Hi,

This series resolve some of the warnings that appear when compiling AMDGPU
with W=1.

Each patch is focused in a specific warning.

This is my First Patch for the GSoC Project Idea about increasing code
coverage of the DRM code[1].

Thanks for reviewing!

Best regards,
Arthur Grillo

[1]: https://www.x.org/wiki/DRMcoverage2023/#firstpatch

---

v1 -> v2: 
https://lore.kernel.org/all/20230213204923.111948-1-arthurgri...@riseup.net/

- Use dm_odm_combine_mode_disabled dm_odm_combine_mode_2to1 instead of an enum 
casting
- Maintain register read

---

Arthur Grillo (3):
   drm/amd/display: Fix implicit enum conversion
   drm/amd/display: Remove unused local variables
   drm/amd/display: Remove unused local variables and function

  .../amd/display/dc/dcn10/dcn10_link_encoder.c |  3 +-
  .../drm/amd/display/dc/dcn201/dcn201_dpp.c|  7 
  .../drm/amd/display/dc/dcn201/dcn201_hwseq.c  |  2 -
  .../gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c |  2 -
  .../gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c |  4 --
  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c|  3 --
  .../gpu/drm/amd/display/dc/dcn31/dcn31_apg.c  | 41 ---
  .../drm/amd/display/dc/dcn32/dcn32_resource.c |  5 +--
  .../display/dc/dcn32/dcn32_resource_helpers.c |  4 --
  .../dc/dml/dcn20/display_mode_vba_20.c|  9 ++--
  .../dc/dml/dcn20/display_mode_vba_20v2.c  | 11 ++---
  .../dc/dml/dcn21/display_mode_vba_21.c| 12 +++---
  .../dc/dml/dcn31/display_rq_dlg_calc_31.c |  2 -
  .../dc/link/protocols/link_dp_capability.c|  4 --
  14 files changed, 19 insertions(+), 90 deletions(-)





  1   2   >