[PATCH v2 2/4] dt-bindings: vendor-prefixes: Add an entry for SKOV A/S

2021-07-13 Thread Oleksij Rempel
Add "skov" entry for the SKOV A/S: https://www.skov.com/en/

Signed-off-by: Oleksij Rempel 
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml 
b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index fa951ba1c738..8ac59bf849fe 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -1060,6 +1060,8 @@ patternProperties:
 description: Silicon Integrated Systems Corp.
   "^sitronix,.*":
 description: Sitronix Technology Corporation
+  "^skov,.*":
+description: SKOV A/S
   "^skyworks,.*":
 description: Skyworks Solutions, Inc.
   "^smartlabs,.*":
-- 
2.30.2



[PATCH v2 0/4] Mainline imx6 based SKOV boards

2021-07-13 Thread Oleksij Rempel
changes v2:
- remove unnecessary newlines.
- change linux,wakeup to wakeup-source
- change switch@3 unit-address to @0
- sort aliases alphabetically

Mainline imx6 based DTs for SKOV A/S boards

Oleksij Rempel (3):
  dt-bindings: display: simple: add some Logic Technologies and
Multi-Inno panels
  dt-bindings: vendor-prefixes: Add an entry for SKOV A/S
  dt-bindings: arm: fsl: add SKOV imx6q and imx6dl based boards

Sam Ravnborg (1):
  ARM: dts: add SKOV imx6q and imx6dl based boards

 .../devicetree/bindings/arm/fsl.yaml  |   5 +
 .../bindings/display/panel/panel-simple.yaml  |   6 +
 .../devicetree/bindings/vendor-prefixes.yaml  |   2 +
 arch/arm/boot/dts/Makefile|   5 +
 arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts|  13 +
 arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts| 108 
 arch/arm/boot/dts/imx6q-skov-revc-lt2.dts |  36 ++
 arch/arm/boot/dts/imx6q-skov-revc-lt6.dts | 128 +
 .../dts/imx6q-skov-reve-mi1010ait-1cp1.dts| 127 +
 arch/arm/boot/dts/imx6qdl-skov-cpu-revc.dtsi  |  54 ++
 arch/arm/boot/dts/imx6qdl-skov-cpu.dtsi   | 475 ++
 11 files changed, 959 insertions(+)
 create mode 100644 arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts
 create mode 100644 arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts
 create mode 100644 arch/arm/boot/dts/imx6q-skov-revc-lt2.dts
 create mode 100644 arch/arm/boot/dts/imx6q-skov-revc-lt6.dts
 create mode 100644 arch/arm/boot/dts/imx6q-skov-reve-mi1010ait-1cp1.dts
 create mode 100644 arch/arm/boot/dts/imx6qdl-skov-cpu-revc.dtsi
 create mode 100644 arch/arm/boot/dts/imx6qdl-skov-cpu.dtsi

-- 
2.30.2



[PATCH v2 1/4] dt-bindings: display: simple: add some Logic Technologies and Multi-Inno panels

2021-07-13 Thread Oleksij Rempel
Add Logictechno and Multi-Inno panels:
- Logic Technologies LTTD800x480 L2RT 7" 800x480 TFT Resistive Touch Module
- Logic Technologies LTTD800480070-L6WH-RT 7” 800x480 TFT Resistive Touch Module
- Multi-Inno Technology Co.,Ltd MI1010AIT-1CP 10.1" 1280x800 LVDS IPS Cap Touch 
Mod.

Signed-off-by: Oleksij Rempel 
---
 .../devicetree/bindings/display/panel/panel-simple.yaml | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml 
b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
index b3797ba2698b..1e7b22aace63 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml
@@ -202,8 +202,14 @@ properties:
   - logictechno,lt161010-2nhr
 # Logic Technologies LT170410-2WHC 10.1" 1280x800 IPS TFT Cap Touch 
Mod.
   - logictechno,lt170410-2whc
+# Logic Technologies LTTD800x480 L2RT 7" 800x480 TFT Resistive Touch 
Module
+  - logictechno,lttd800480070-l2rt
+# Logic Technologies LTTD800480070-L6WH-RT 7” 800x480 TFT Resistive 
Touch Module
+  - logictechno,lttd800480070-l6wh-rt
 # Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel
   - mitsubishi,aa070mc01-ca1
+# Multi-Inno Technology Co.,Ltd MI1010AIT-1CP 10.1" 1280x800 LVDS IPS 
Cap Touch Mod.
+  - multi-inno,mi1010ait-1cp
 # NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel
   - nec,nl12880bc20-05
 # NEC LCD Technologies,Ltd. WQVGA TFT LCD panel
-- 
2.30.2



[PATCH v2 4/4] ARM: dts: add SKOV imx6q and imx6dl based boards

2021-07-13 Thread Oleksij Rempel
From: Sam Ravnborg 

Add SKOV imx6q/dl LT2, LT6 and mi1010ait-1cp1 boards.

Signed-off-by: Sam Ravnborg 
Signed-off-by: Søren Andersen 
Signed-off-by: Juergen Borleis 
Signed-off-by: Ulrich Ölmann 
Signed-off-by: Michael Grzeschik 
Signed-off-by: Marco Felsch 
Signed-off-by: Lucas Stach 
Signed-off-by: Oleksij Rempel 
---
 arch/arm/boot/dts/Makefile|   5 +
 arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts|  13 +
 arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts| 108 
 arch/arm/boot/dts/imx6q-skov-revc-lt2.dts |  36 ++
 arch/arm/boot/dts/imx6q-skov-revc-lt6.dts | 128 +
 .../dts/imx6q-skov-reve-mi1010ait-1cp1.dts| 127 +
 arch/arm/boot/dts/imx6qdl-skov-cpu-revc.dtsi  |  54 ++
 arch/arm/boot/dts/imx6qdl-skov-cpu.dtsi   | 475 ++
 8 files changed, 946 insertions(+)
 create mode 100644 arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts
 create mode 100644 arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts
 create mode 100644 arch/arm/boot/dts/imx6q-skov-revc-lt2.dts
 create mode 100644 arch/arm/boot/dts/imx6q-skov-revc-lt6.dts
 create mode 100644 arch/arm/boot/dts/imx6q-skov-reve-mi1010ait-1cp1.dts
 create mode 100644 arch/arm/boot/dts/imx6qdl-skov-cpu-revc.dtsi
 create mode 100644 arch/arm/boot/dts/imx6qdl-skov-cpu.dtsi

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index a2389b17026a..66aef5c6526d 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -473,6 +473,8 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
imx6dl-sabrelite.dtb \
imx6dl-sabresd.dtb \
imx6dl-savageboard.dtb \
+   imx6dl-skov-revc-lt2.dtb \
+   imx6dl-skov-revc-lt6.dtb \
imx6dl-ts4900.dtb \
imx6dl-ts7970.dtb \
imx6dl-tx6dl-comtft.dtb \
@@ -573,6 +575,9 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
imx6q-sabresd.dtb \
imx6q-savageboard.dtb \
imx6q-sbc6x.dtb \
+   imx6q-skov-revc-lt2.dtb \
+   imx6q-skov-revc-lt6.dtb \
+   imx6q-skov-reve-mi1010ait-1cp1.dtb \
imx6q-tbs2910.dtb \
imx6q-ts4900.dtb \
imx6q-ts7970.dtb \
diff --git a/arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts 
b/arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts
new file mode 100644
index ..667b8faa1807
--- /dev/null
+++ b/arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+//
+// Copyright (C) 2020 Pengutronix, Ulrich Oelmann 
+
+/dts-v1/;
+#include "imx6dl.dtsi"
+#include "imx6qdl-skov-cpu.dtsi"
+#include "imx6qdl-skov-cpu-revc.dtsi"
+
+/ {
+   model = "SKOV IMX6 CPU SoloCore";
+   compatible = "skov,imx6dl-skov-revc-lt2", "fsl,imx6dl";
+};
diff --git a/arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts 
b/arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts
new file mode 100644
index ..25071c7c4e29
--- /dev/null
+++ b/arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+//
+// Copyright (C) 2020 Pengutronix, Ulrich Oelmann 
+
+/dts-v1/;
+#include "imx6dl.dtsi"
+#include "imx6qdl-skov-cpu.dtsi"
+#include "imx6qdl-skov-cpu-revc.dtsi"
+
+/ {
+   model = "SKOV IMX6 CPU SoloCore";
+   compatible = "skov,imx6dl-skov-revc-lt6", "fsl,imx6dl";
+
+   backlight: backlight {
+   compatible = "pwm-backlight";
+   pinctrl-names = "default";
+   pinctrl-0 = <_backlight>;
+   enable-gpios = < 23 GPIO_ACTIVE_LOW>;
+   pwms = < 0 2 0>;
+   brightness-levels = <0 255>;
+   num-interpolated-steps = <17>;
+   default-brightness-level = <8>;
+   power-supply = <_24v0>;
+   };
+
+   display {
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   compatible = "fsl,imx-parallel-display";
+   pinctrl-names = "default";
+   pinctrl-0 = <_ipu1>;
+
+   port@0 {
+   reg = <0>;
+
+   display0_in: endpoint {
+   remote-endpoint = <_di0_disp0>;
+   };
+   };
+
+   port@1 {
+   reg = <1>;
+
+   display0_out: endpoint {
+   remote-endpoint = <_in>;
+   };
+   };
+   };
+
+
+   panel {
+   compatible = "logictechno,lttd800480070-l6wh-rt";
+   backlight = <>;
+   power-supply = <_3v3>;
+
+   port {
+   panel_in: endpoint {
+   remote-endpoint = <_out>;
+   };
+   };
+   };
+};
+
+_di0_disp0 {
+   remote-endpoint = <_in>;
+};
+
+ {
+   pinctrl_backlight: backlightgrp {
+   fsl,pins = <
+   MX6QDL_PAD_RGMII_TD3__GPIO6_IO230x58
+   >;
+   };
+
+   pinctrl_ipu1: ipu1grp {
+   fsl,pins = <
+

[PATCH v2 3/4] dt-bindings: arm: fsl: add SKOV imx6q and imx6dl based boards

2021-07-13 Thread Oleksij Rempel
Add SKOV imx6q/dl LT2, LT6 and mi1010ait-1cp1 boards.

Signed-off-by: Oleksij Rempel 
---
 Documentation/devicetree/bindings/arm/fsl.yaml | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml 
b/Documentation/devicetree/bindings/arm/fsl.yaml
index 1c827c1954dc..7520af510f06 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -221,6 +221,9 @@ properties:
   - prt,prti6q# Protonic PRTI6Q board
   - prt,prtwd2# Protonic WD2 board
   - rex,imx6q-rex-pro # Rex Pro i.MX6 Quad Board
+  - skov,imx6q-skov-revc-lt2  # SKOV IMX6 CPU QuadCore lt2
+  - skov,imx6q-skov-revc-lt6  # SKOV IMX6 CPU QuadCore lt6
+  - skov,imx6q-skov-reve-mi1010ait-1cp1 # SKOV IMX6 CPU QuadCore 
mi1010ait-1cp1
   - solidrun,cubox-i/q# SolidRun Cubox-i Dual/Quad
   - solidrun,hummingboard/q
   - solidrun,hummingboard2/q
@@ -377,6 +380,8 @@ properties:
   - prt,prtvt7# Protonic VT7 board
   - rex,imx6dl-rex-basic  # Rex Basic i.MX6 Dual Lite Board
   - riot,imx6s-riotboard  # RIoTboard i.MX6S
+  - skov,imx6dl-skov-revc-lt2 # SKOV IMX6 CPU SoloCore lt2
+  - skov,imx6dl-skov-revc-lt6 # SKOV IMX6 CPU SoloCore lt6
   - solidrun,cubox-i/dl# SolidRun Cubox-i Solo/DualLite
   - solidrun,hummingboard/dl
   - solidrun,hummingboard2/dl  # SolidRun HummingBoard2 
Solo/DualLite
-- 
2.30.2



[PATCH 4/4] drm/vmwgfx: Use 2.19 version number to recognize mks-stats ioctls

2021-07-13 Thread Zack Rusin
To let the userspace recognize that it's running on top of a vmwgfx
that supports mks-stat ioctls we need to bump the version number.

Signed-off-by: Zack Rusin 
Reviewed-by: Martin Krastev 
Reviewed-by: Reviewed-by: Neha Bhende 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 46bf54f6169a..aaadda5f1b4a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -54,10 +54,10 @@
 
 
 #define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20210218"
+#define VMWGFX_DRIVER_DATE "20210713"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 18
-#define VMWGFX_DRIVER_PATCHLEVEL 1
+#define VMWGFX_DRIVER_MINOR 19
+#define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
 #define VMWGFX_MAX_VALIDATIONS 2048
-- 
2.30.2



[PATCH 2/4] drm/vmwgfx: Switch to using DRM_IOCTL_DEF_DRV

2021-07-13 Thread Zack Rusin
The macro has been accounting for DRM_COMMAND_BASE for a long time
now so there's no reason to still be duplicating it. Plus we were
leaving the name undefined which meant that all the DRM ioctl
warnings/errors were always listing "null" ioctl at the culprit.

This fixes the undefined ioctl name and removes duplicated code.

Signed-off-by: Zack Rusin 
Reviewed-by: Martin Krastev 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 176 +---
 1 file changed, 84 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 7d8cc2f6b04e..359f2e6f3693 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -159,110 +159,102 @@
DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_MKSSTAT_REMOVE,  \
struct drm_vmw_mksstat_remove_arg)
 
-/*
- * The core DRM version of this macro doesn't account for
- * DRM_COMMAND_BASE.
- */
-
-#define VMW_IOCTL_DEF(ioctl, func, flags) \
-  [DRM_IOCTL_NR(DRM_IOCTL_##ioctl) - DRM_COMMAND_BASE] = {DRM_IOCTL_##ioctl, 
flags, func}
-
 /*
  * Ioctl definitions.
  */
 
 static const struct drm_ioctl_desc vmw_ioctls[] = {
-   VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_CURSOR_BYPASS,
- vmw_kms_cursor_bypass_ioctl,
- DRM_MASTER),
-
-   VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl,
- DRM_MASTER),
-   VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
- DRM_MASTER),
-   VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
- DRM_MASTER),
-
-   VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_FENCE_SIGNALED,
- vmw_fence_obj_signaled_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_FENCE_EVENT, vmw_fence_event_ioctl,
- DRM_RENDER_ALLOW),
-   VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl,
- DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_GET_PARAM, vmw_getparam_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_CURSOR_BYPASS,
+ vmw_kms_cursor_bypass_ioctl,
+ DRM_MASTER),
+
+   DRM_IOCTL_DEF_DRV(VMW_CONTROL_STREAM, vmw_overlay_ioctl,
+ DRM_MASTER),
+   DRM_IOCTL_DEF_DRV(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
+ DRM_MASTER),
+   DRM_IOCTL_DEF_DRV(VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
+ DRM_MASTER),
+
+   DRM_IOCTL_DEF_DRV(VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_CREATE_SURFACE, vmw_surface_define_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_REF_SURFACE, vmw_surface_reference_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_EXECBUF, vmw_execbuf_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_FENCE_SIGNALED,
+ vmw_fence_obj_signaled_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
+ DRM_RENDER_ALLOW),
+   DRM_IOCTL_DEF_DRV(VMW_FENCE_EVENT, 

[PATCH 1/4] drm/vmwgfx: Add support for CursorMob and CursorBypass 4

2021-07-13 Thread Zack Rusin
From: Martin Krastev 

* Add support for CursorMob
* Add support for CursorBypass 4

Reviewed-by: Zack Rusin 
Signed-off-by: Martin Krastev 
Signed-off-by: Zack Rusin 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 45 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  6 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 79 +++--
 3 files changed, 125 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 086dc75e7b42..7d8cc2f6b04e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR MIT
 /**
  *
- * Copyright 2009-2016 VMware, Inc., Palo Alto, CA., USA
+ * Copyright 2009-2021 VMware, Inc., Palo Alto, CA., USA
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -301,8 +301,12 @@ static void vmw_print_capabilities2(uint32_t capabilities2)
DRM_INFO("  Grow oTable.\n");
if (capabilities2 & SVGA_CAP2_INTRA_SURFACE_COPY)
DRM_INFO("  IntraSurface copy.\n");
+   if (capabilities2 & SVGA_CAP2_CURSOR_MOB)
+   DRM_INFO("  Cursor Mob.\n");
if (capabilities2 & SVGA_CAP2_DX3)
DRM_INFO("  DX3.\n");
+   if (capabilities2 & SVGA_CAP2_EXTRA_REGS)
+   DRM_INFO("  Extra Regs.\n");
 }
 
 static void vmw_print_capabilities(uint32_t capabilities)
@@ -505,6 +509,7 @@ static int vmw_request_device_late(struct vmw_private 
*dev_priv)
 static int vmw_request_device(struct vmw_private *dev_priv)
 {
int ret;
+   size_t i;
 
ret = vmw_device_init(dev_priv);
if (unlikely(ret != 0)) {
@@ -526,6 +531,37 @@ static int vmw_request_device(struct vmw_private *dev_priv)
if (unlikely(ret != 0))
goto out_no_query_bo;
 
+   /* Set up mobs for cursor updates */
+   if (dev_priv->has_mob && dev_priv->capabilities2 & 
SVGA_CAP2_CURSOR_MOB) {
+   const uint32_t cursor_max_dim = vmw_read(dev_priv, 
SVGA_REG_CURSOR_MAX_DIMENSION);
+
+   for (i = 0; i < ARRAY_SIZE(dev_priv->cursor_mob); i++) {
+   struct ttm_buffer_object **const bo = 
_priv->cursor_mob[i];
+
+   ret = vmw_bo_create_kernel(dev_priv,
+   cursor_max_dim * cursor_max_dim * sizeof(u32) + 
sizeof(SVGAGBCursorHeader),
+   _mob_placement, bo);
+
+   if (ret != 0) {
+   DRM_ERROR("Unable to create CursorMob 
array.\n");
+   break;
+   }
+
+   BUG_ON((*bo)->resource->mem_type != VMW_PL_MOB);
+
+   /* Fence the mob creation so we are guarateed to have 
the mob */
+   ret = ttm_bo_reserve(*bo, false, true, NULL);
+   BUG_ON(ret);
+
+   vmw_bo_fence_single(*bo, NULL);
+
+   ttm_bo_unreserve(*bo);
+
+   DRM_INFO("Using CursorMob mobid %lu, max dimension 
%u\n",
+(*bo)->resource->start, cursor_max_dim);
+   }
+   }
+
return 0;
 
 out_no_query_bo:
@@ -556,6 +592,8 @@ static int vmw_request_device(struct vmw_private *dev_priv)
  */
 static void vmw_release_device_early(struct vmw_private *dev_priv)
 {
+   size_t i;
+
/*
 * Previous destructions should've released
 * the pinned bo.
@@ -570,6 +608,11 @@ static void vmw_release_device_early(struct vmw_private 
*dev_priv)
if (dev_priv->has_mob) {
struct ttm_resource_manager *man;
 
+   for (i = 0; i < ARRAY_SIZE(dev_priv->cursor_mob); i++) {
+   if (dev_priv->cursor_mob[i] != NULL)
+   ttm_bo_put(dev_priv->cursor_mob[i]);
+   }
+
man = ttm_manager_type(_priv->bdev, VMW_PL_MOB);
ttm_resource_manager_evict_all(_priv->bdev, man);
vmw_otables_takedown(dev_priv);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 356f82c26f59..46bf54f6169a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -642,6 +642,12 @@ struct vmw_private {
u8 mksstat_kern_top_timer[MKSSTAT_CAPACITY];
atomic_t mksstat_kern_pids[MKSSTAT_CAPACITY];
 #endif
+
+   /*
+* CursorMob buffer objects
+*/
+   struct ttm_buffer_object *cursor_mob[2];
+   atomic_t cursor_mob_idx;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 2ddf4932d62c..8d7844354774 100644
--- 

[PATCH 3/4] drm/vmwgfx: Be a lot more flexible with MOB limits

2021-07-13 Thread Zack Rusin
The code was trying to keep a strict limit on the amount of mob
memory that was used in the guest by making it match the host
settings. There's technically no reason to do that (guests can
certainly use more than the host can have resident in renderers
at the same time).

In particular this is problematic because our userspace is not
great at handling OOM conditions and running out of MOB space
results in GL apps crashing, e.g. gnome-shell likes to allocate
huge surfaces (~61MB for the desktop on 2560x1600 with two workspaces)
and running out of memory there means that the gnome-shell crashes
on startup taking us back to the login and resulting in a system
where one can not login in graphically anymore.

Instead of letting the userspace crash we can extend available
MOB space, we just don't want to use all of the RAM for graphics,
so we're going to limit it to half of RAM.

With the addition of some extra logging this should make the
"guest has been configured with not enough graphics memory"
errors a lot easier to diagnose in cases where the automatic
expansion of MOB space fails.

Signed-off-by: Zack Rusin 
Reviewed-by: Martin Krastev 
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |  8 -
 drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 36 +--
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 359f2e6f3693..a9195c472b75 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -948,6 +948,13 @@ static int vmw_driver_load(struct vmw_private *dev_priv, 
u32 pci_id)
dev_priv->texture_max_height = 8192;
dev_priv->max_primary_mem = dev_priv->vram_size;
}
+   DRM_INFO("Legacy memory limits: VRAM = %llu kB, FIFO = %llu kB, surface 
= %u kB\n",
+(u64)dev_priv->vram_size / 1024,
+(u64)dev_priv->fifo_mem_size / 1024,
+dev_priv->memory_size / 1024);
+
+   DRM_INFO("MOB limits: max mob size = %u kB, max mob pages = %u\n",
+dev_priv->max_mob_size / 1024, dev_priv->max_mob_pages);
 
vmw_print_capabilities(dev_priv->capabilities);
if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER)
@@ -1094,7 +1101,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, 
u32 pci_id)
DRM_INFO("SM4_1 support available.\n");
if (dev_priv->sm_type == VMW_SM_4)
DRM_INFO("SM4 support available.\n");
-   DRM_INFO("Running without reservation semaphore\n");
 
vmw_host_printf("vmwgfx: Module Version: %d.%d.%d (kernel: %s)",
VMWGFX_DRIVER_MAJOR, VMWGFX_DRIVER_MINOR,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
index 28ceb749a733..b2c4af331c9d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
@@ -71,8 +71,40 @@ static int vmw_gmrid_man_get_node(struct 
ttm_resource_manager *man,
 
if (gman->max_gmr_pages > 0) {
gman->used_gmr_pages += (*res)->num_pages;
-   if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages))
-   goto nospace;
+   /*
+* Because the graphics memory is a soft limit we can try to
+* expand it instead of letting the userspace apps crash.
+* We're just going to have a sane limit (half of RAM)
+* on the number of MOB's that we create and will try to keep
+* the system running until we reach that.
+*/
+   if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages)) {
+   const unsigned long max_graphics_pages = 
totalram_pages() / 2;
+   uint32_t new_max_pages = 0;
+
+   DRM_WARN("vmwgfx: mob memory overflow. Consider 
increasing guest RAM and graphicsMemory.\n");
+   vmw_host_printf("vmwgfx, warning: mob memory overflow. 
Consider increasing guest RAM and graphicsMemory.\n");
+
+   if (gman->max_gmr_pages > (max_graphics_pages / 2)) {
+   DRM_WARN("vmwgfx: guest requires more than half 
of RAM for graphics.\n");
+   new_max_pages = max_graphics_pages;
+   } else
+   new_max_pages = gman->max_gmr_pages * 2;
+   if (new_max_pages > gman->max_gmr_pages && 
new_max_pages >= gman->used_gmr_pages) {
+   DRM_WARN("vmwgfx: increasing guest mob limits 
to %u kB.\n",
+((new_max_pages) << (PAGE_SHIFT - 
10)));
+
+   gman->max_gmr_pages = new_max_pages;
+   } else {
+   char buf[256];
+   snprintf(buf, 

Re: [PATCH v1 4/4] ARM: dts: add SKOV imx6q and imx6dl based boards

2021-07-13 Thread Shawn Guo
On Wed, Jun 09, 2021 at 02:10:50PM +0200, Oleksij Rempel wrote:
> From: Sam Ravnborg 
> 
> Add SKOV imx6q/dl LT2, LT6 and mi1010ait-1cp1 boards.
> 
> Signed-off-by: Sam Ravnborg 
> Signed-off-by: Søren Andersen 
> Signed-off-by: Juergen Borleis 
> Signed-off-by: Ulrich Ölmann 
> Signed-off-by: Michael Grzeschik 
> Signed-off-by: Marco Felsch 
> Signed-off-by: Lucas Stach 
> Signed-off-by: Oleksij Rempel 
> ---
>  arch/arm/boot/dts/Makefile|   5 +
>  arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts|  13 +
>  arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts| 108 
>  arch/arm/boot/dts/imx6q-skov-revc-lt2.dts |  36 ++
>  arch/arm/boot/dts/imx6q-skov-revc-lt6.dts | 128 +
>  .../dts/imx6q-skov-reve-mi1010ait-1cp1.dts| 127 +
>  arch/arm/boot/dts/imx6qdl-skov-cpu-revc.dtsi  |  58 +++
>  arch/arm/boot/dts/imx6qdl-skov-cpu.dtsi   | 476 ++
>  8 files changed, 951 insertions(+)
>  create mode 100644 arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts
>  create mode 100644 arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts
>  create mode 100644 arch/arm/boot/dts/imx6q-skov-revc-lt2.dts
>  create mode 100644 arch/arm/boot/dts/imx6q-skov-revc-lt6.dts
>  create mode 100644 arch/arm/boot/dts/imx6q-skov-reve-mi1010ait-1cp1.dts
>  create mode 100644 arch/arm/boot/dts/imx6qdl-skov-cpu-revc.dtsi
>  create mode 100644 arch/arm/boot/dts/imx6qdl-skov-cpu.dtsi
> 
> diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
> index f8f09c5066e7..60a3ef665697 100644
> --- a/arch/arm/boot/dts/Makefile
> +++ b/arch/arm/boot/dts/Makefile
> @@ -473,6 +473,8 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
>   imx6dl-sabrelite.dtb \
>   imx6dl-sabresd.dtb \
>   imx6dl-savageboard.dtb \
> + imx6dl-skov-revc-lt2.dtb \
> + imx6dl-skov-revc-lt6.dtb \
>   imx6dl-ts4900.dtb \
>   imx6dl-ts7970.dtb \
>   imx6dl-tx6dl-comtft.dtb \
> @@ -567,6 +569,9 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
>   imx6q-sabresd.dtb \
>   imx6q-savageboard.dtb \
>   imx6q-sbc6x.dtb \
> + imx6q-skov-revc-lt2.dtb \
> + imx6q-skov-revc-lt6.dtb \
> + imx6q-skov-reve-mi1010ait-1cp1.dtb \
>   imx6q-tbs2910.dtb \
>   imx6q-ts4900.dtb \
>   imx6q-ts7970.dtb \
> diff --git a/arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts 
> b/arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts
> new file mode 100644
> index ..667b8faa1807
> --- /dev/null
> +++ b/arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts
> @@ -0,0 +1,13 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR MIT)
> +//
> +// Copyright (C) 2020 Pengutronix, Ulrich Oelmann 
> +
> +/dts-v1/;
> +#include "imx6dl.dtsi"
> +#include "imx6qdl-skov-cpu.dtsi"
> +#include "imx6qdl-skov-cpu-revc.dtsi"
> +
> +/ {
> + model = "SKOV IMX6 CPU SoloCore";
> + compatible = "skov,imx6dl-skov-revc-lt2", "fsl,imx6dl";
> +};
> diff --git a/arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts 
> b/arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts
> new file mode 100644
> index ..25071c7c4e29
> --- /dev/null
> +++ b/arch/arm/boot/dts/imx6dl-skov-revc-lt6.dts
> @@ -0,0 +1,108 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR MIT)
> +//
> +// Copyright (C) 2020 Pengutronix, Ulrich Oelmann 
> +
> +/dts-v1/;
> +#include "imx6dl.dtsi"
> +#include "imx6qdl-skov-cpu.dtsi"
> +#include "imx6qdl-skov-cpu-revc.dtsi"
> +
> +/ {
> + model = "SKOV IMX6 CPU SoloCore";
> + compatible = "skov,imx6dl-skov-revc-lt6", "fsl,imx6dl";
> +
> + backlight: backlight {
> + compatible = "pwm-backlight";
> + pinctrl-names = "default";
> + pinctrl-0 = <_backlight>;
> + enable-gpios = < 23 GPIO_ACTIVE_LOW>;
> + pwms = < 0 2 0>;
> + brightness-levels = <0 255>;
> + num-interpolated-steps = <17>;
> + default-brightness-level = <8>;
> + power-supply = <_24v0>;
> + };
> +
> + display {
> + #address-cells = <1>;
> + #size-cells = <0>;
> +
> + compatible = "fsl,imx-parallel-display";
> + pinctrl-names = "default";
> + pinctrl-0 = <_ipu1>;
> +
> + port@0 {
> + reg = <0>;
> +
> + display0_in: endpoint {
> + remote-endpoint = <_di0_disp0>;
> + };
> + };
> +
> + port@1 {
> + reg = <1>;
> +
> + display0_out: endpoint {
> + remote-endpoint = <_in>;
> + };
> + };
> + };
> +
> +
> + panel {
> + compatible = "logictechno,lttd800480070-l6wh-rt";
> + backlight = <>;
> + power-supply = <_3v3>;
> +
> + port {
> + panel_in: endpoint {
> + remote-endpoint = <_out>;
> + };
> + };
> + };
> +};
> +
> +_di0_disp0 {
> + remote-endpoint = <_in>;
> +};
> +
> + {
> + 

Re: [PATCH 06/16] drm/i915/guc/slpc: Allocate, initialize and release slpc

2021-07-13 Thread Belgaumkar, Vinay




On 7/10/2021 9:05 AM, Michal Wajdeczko wrote:



On 10.07.2021 03:20, Vinay Belgaumkar wrote:

Allocate data structures for SLPC and functions for
initializing on host side.

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c  | 11 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 36 -
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h | 20 
  3 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 9d61b2d54de4..82863a9bc8e8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -336,6 +336,12 @@ int intel_guc_init(struct intel_guc *guc)
goto err_ct;
}
  
+	if (intel_guc_slpc_is_used(guc)) {

+   ret = intel_guc_slpc_init(>slpc);
+   if (ret)
+   goto err_submission;
+   }
+
/* now that everything is perma-pinned, initialize the parameters */
guc_init_params(guc);
  
@@ -346,6 +352,8 @@ int intel_guc_init(struct intel_guc *guc)
  
  	return 0;
  
+err_submission:

+   intel_guc_submission_fini(guc);
  err_ct:
intel_guc_ct_fini(>ct);
  err_ads:
@@ -368,6 +376,9 @@ void intel_guc_fini(struct intel_guc *guc)
  
  	i915_ggtt_disable_guc(gt->ggtt);
  
+	if (intel_guc_slpc_is_used(guc))

+   intel_guc_slpc_fini(>slpc);
+
if (intel_guc_submission_is_used(guc))
intel_guc_submission_fini(guc);
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c

index c1f569d2300d..94e2f19951aa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -4,11 +4,41 @@
   * Copyright © 2020 Intel Corporation
   */
  
+#include 


hmm, what exactly is needed from this header ?


Was being used in a previous version for MSR reads, removed.




+
+#include "gt/intel_gt.h"
+#include "gt/intel_rps.h"
+
+#include "i915_drv.h"
  #include "intel_guc_slpc.h"
+#include "intel_pm.h"
+
+static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc)
+{
+   return container_of(slpc, struct intel_guc, slpc);
+}
+
+static int slpc_shared_data_init(struct intel_guc_slpc *slpc)
+{
+   struct intel_guc *guc = slpc_to_guc(slpc);
+   int err;
+   u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));


move err decl here


+
+   err = intel_guc_allocate_and_map_vma(guc, size, >vma, 
>vaddr);
+   if (unlikely(err)) {
+   DRM_ERROR("Failed to allocate slpc struct (err=%d)\n", err);


s/slpc/SLPC

and use drm_err instead
and you may also want to print error as %pe


added.




+   i915_vma_unpin_and_release(>vma, I915_VMA_RELEASE_MAP);


do you really need this ?


removed.



+   return err;
+   }
+
+   return err;
+}
  
  int intel_guc_slpc_init(struct intel_guc_slpc *slpc)

  {
-   return 0;
+   GEM_BUG_ON(slpc->vma);
+
+   return slpc_shared_data_init(slpc);
  }
  
  /*

@@ -31,4 +61,8 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
  
  void intel_guc_slpc_fini(struct intel_guc_slpc *slpc)

  {
+   if (!slpc->vma)
+   return;
+
+   i915_vma_unpin_and_release(>vma, I915_VMA_RELEASE_MAP);
  }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 98036459a1a3..a2643b904165 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -3,12 +3,32 @@
   *
   * Copyright © 2020 Intel Corporation
   */
+


should be fixed in earlier patch


  #ifndef _INTEL_GUC_SLPC_H_
  #define _INTEL_GUC_SLPC_H_
  
+#include 

  #include "intel_guc_slpc_fwif.h"
  
  struct intel_guc_slpc {

+   /*Protects access to vma and SLPC actions */


hmm, missing mutex ;)


Removed.




+   struct i915_vma *vma;
+   void *vaddr;


no need to be void, define it as ptr to slpc_shared_data


+
+   /* platform frequency limits */
+   u32 min_freq;
+   u32 rp0_freq;
+   u32 rp1_freq;
+
+   /* frequency softlimits */
+   u32 min_freq_softlimit;
+   u32 max_freq_softlimit;
+
+   struct {
+   u32 param_id;
+   u32 param_value;
+   u32 param_override;
+   } debug;


can you add all these extra fields in patches which will need them?

Michal


Done.

Thanks,
Vinay.




  };
  
  int intel_guc_slpc_init(struct intel_guc_slpc *slpc);






Re: [PATCH v2 2/2] drm/rockchip: dw_hdmi: add rk3568 support

2021-07-13 Thread Andy Yan

Hi Alex:

On 7/13/21 7:40 PM, Alex Bee wrote:

Hi Benjamin,

Am 07.07.21 um 14:03 schrieb Benjamin Gaignard:

Add a new dw_hdmi_plat_data struct and new compatible for rk3568.
This version of the HDMI hardware block need two clocks to provide
phy reference clock: hclk_vio and hclk.

Signed-off-by: Benjamin Gaignard 
---
version 2:
- Add the clocks needed for the phy.


If got Alega's comment correct, it wasn't about the hclks.
It looks like for this variant, there is another reference clock 
required (for the phy) like vpll is already (looks like downstream 
uses HPLL ( = "HDMI-PLL" ?) for that - which also has to switch the 
frequency according the the drm mode rate - the two clocks you added 
here are get just enabled (and disabled) here.



Yes, it's HPLL, and the frequency of HPLL and drm mode rate(vop dclk) 
should keep 1:1.




Alega, Andy: Is it really required to enable hclk_vio and hclk(_vop) 
in the hdmi driver? Are they required to be enabled for the other 
output variants (i.e. mipi, dsi, rgb ) as well and shouldn't 
better be enabled in the (not-yet existing) vop2 driver?



hclk_vop should be enabled, other wise you can't access hdmi registers. 
This is only required for HDMI(mipi dis, edp, rgb don't need it)




Overall: I'm not sure of the benefit of adding this hdmi variant for a 
SoC where the display driver isn't implemented upstream yet. The 
"VOP2" IP seems widely new and should probably be ported first. (even 
if the HDMI part seems a low hanging fruit according to the vendor 
sources)



Yes, VOP2 IP is widely totaly new and complicated, I have a plan to do 
the upstream. But I am in a rush now, so please give me a lite time.




Best,
Alex



  drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 68 +
  1 file changed, 68 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c 
b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c

index 830bdd5e9b7ce..dc0e255e45745 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -50,6 +50,10 @@
  #define RK3399_GRF_SOC_CON20    0x6250
  #define RK3399_HDMI_LCDC_SEL    BIT(6)
  +#define RK3568_GRF_VO_CON1    0x0364
+#define RK3568_HDMI_SDAIN_MSK    BIT(15)
+#define RK3568_HDMI_SCLIN_MSK    BIT(14)
+
  #define HIWORD_UPDATE(val, mask)    (val | (mask) << 16)
    /**
@@ -71,6 +75,8 @@ struct rockchip_hdmi {
  const struct rockchip_hdmi_chip_data *chip_data;
  struct clk *vpll_clk;
  struct clk *grf_clk;
+    struct clk *hclk_vio;
+    struct clk *hclk_vop;
  struct dw_hdmi *hdmi;
  struct phy *phy;
  };
@@ -216,6 +222,26 @@ static int rockchip_hdmi_parse_dt(struct 
rockchip_hdmi *hdmi)

  return PTR_ERR(hdmi->grf_clk);
  }
  +    hdmi->hclk_vio = devm_clk_get(hdmi->dev, "hclk_vio");
+    if (PTR_ERR(hdmi->hclk_vio) == -ENOENT) {
+    hdmi->hclk_vio = NULL;
+    } else if (PTR_ERR(hdmi->hclk_vio) == -EPROBE_DEFER) {
+    return -EPROBE_DEFER;
+    } else if (IS_ERR(hdmi->hclk_vio)) {
+    dev_err(hdmi->dev, "failed to get hclk_vio clock\n");
+    return PTR_ERR(hdmi->hclk_vio);
+    }
+
+    hdmi->hclk_vop = devm_clk_get(hdmi->dev, "hclk");
+    if (PTR_ERR(hdmi->hclk_vop) == -ENOENT) {
+    hdmi->hclk_vop = NULL;
+    } else if (PTR_ERR(hdmi->hclk_vop) == -EPROBE_DEFER) {
+    return -EPROBE_DEFER;
+    } else if (IS_ERR(hdmi->hclk_vop)) {
+    dev_err(hdmi->dev, "failed to get hclk_vop clock\n");
+    return PTR_ERR(hdmi->hclk_vop);
+    }
+
  return 0;
  }
  @@ -467,6 +493,19 @@ static const struct dw_hdmi_plat_data 
rk3399_hdmi_drv_data = {

  .use_drm_infoframe = true,
  };
  +static struct rockchip_hdmi_chip_data rk3568_chip_data = {
+    .lcdsel_grf_reg = -1,
+};
+
+static const struct dw_hdmi_plat_data rk3568_hdmi_drv_data = {
+    .mode_valid = dw_hdmi_rockchip_mode_valid,
+    .mpll_cfg   = rockchip_mpll_cfg,
+    .cur_ctr    = rockchip_cur_ctr,
+    .phy_config = rockchip_phy_config,
+    .phy_data = _chip_data,
+    .use_drm_infoframe = true,
+};
+
  static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = {
  { .compatible = "rockchip,rk3228-dw-hdmi",
    .data = _hdmi_drv_data
@@ -480,6 +519,9 @@ static const struct of_device_id 
dw_hdmi_rockchip_dt_ids[] = {

  { .compatible = "rockchip,rk3399-dw-hdmi",
    .data = _hdmi_drv_data
  },
+    { .compatible = "rockchip,rk3568-dw-hdmi",
+  .data = _hdmi_drv_data
+    },
  {},
  };
  MODULE_DEVICE_TABLE(of, dw_hdmi_rockchip_dt_ids);
@@ -536,6 +578,28 @@ static int dw_hdmi_rockchip_bind(struct device 
*dev, struct device *master,

  return ret;
  }
  +    ret = clk_prepare_enable(hdmi->hclk_vio);
+    if (ret) {
+    dev_err(hdmi->dev, "Failed to enable HDMI hclk_vio: %d\n",
+    ret);
+    return ret;
+    }
+
+    ret = clk_prepare_enable(hdmi->hclk_vop);
+    if (ret) {
+    dev_err(hdmi->dev, "Failed to enable HDMI hclk_vop: %d\n",
+    

Re: [PATCH 05/16] drm/i915/guc/slpc: Adding slpc communication interfaces

2021-07-13 Thread Belgaumkar, Vinay




On 7/10/2021 8:52 AM, Michal Wajdeczko wrote:



On 10.07.2021 03:20, Vinay Belgaumkar wrote:

Replicate the SLPC header file in GuC for the most part. There are


what you mean by "replicate" here?


some SLPC mode based parameters which haven't been included since
we are not using them.

Signed-off-by: Vinay Belgaumkar 
Signed-off-by: Sundaresan Sujaritha 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.c|   4 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |   2 +
  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |   2 +
  .../gpu/drm/i915/gt/uc/intel_guc_slpc_fwif.h  | 255 ++
  4 files changed, 263 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_fwif.h

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index b9a809f2d221..9d61b2d54de4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -202,11 +202,15 @@ static u32 guc_ctl_debug_flags(struct intel_guc *guc)
  
  static u32 guc_ctl_feature_flags(struct intel_guc *guc)

  {
+   struct intel_gt *gt = guc_to_gt(guc);
u32 flags = 0;
  
  	if (!intel_guc_submission_is_used(guc))

flags |= GUC_CTL_DISABLE_SCHEDULER;
  
+	if (intel_uc_uses_guc_slpc(>uc))

+   flags |= GUC_CTL_ENABLE_SLPC;
+
return flags;
  }
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h

index 94bb1ca6f889..19e2504d7a36 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -114,6 +114,8 @@
  #define   GUC_ADS_ADDR_SHIFT  1
  #define   GUC_ADS_ADDR_MASK   (0xF << GUC_ADS_ADDR_SHIFT)
  
+#define GUC_CTL_ENABLE_SLPCBIT(2)


this should be defined closer to GUC_CTL_FEATURE


done.




+
  #define GUC_CTL_MAX_DWORDS(SOFT_SCRATCH_COUNT - 2) /* [1..14] */
  
  /* Generic GT SysInfo data types */

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 74fd86769163..98036459a1a3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -6,6 +6,8 @@
  #ifndef _INTEL_GUC_SLPC_H_
  #define _INTEL_GUC_SLPC_H_
  
+#include "intel_guc_slpc_fwif.h"


doesn't seem to be needed right now


Removed for this patch.



+
  struct intel_guc_slpc {
  };
  
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_fwif.h

new file mode 100644
index ..2a5e71428374
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_fwif.h


I've started to move all pure ABI definitions to files in abi/ folder,
leaving in guc_fwif.h only our next level helpers/wrappers.

Can you move these SLPC definition there too ? maybe as dedicated:

abi/guc_slpc_abi.h


done.




@@ -0,0 +1,255 @@
+/*
+ * SPDX-License-Identifier: MIT


use proper format


+ *
+ * Copyright © 2020 Intel Corporation


2021


+ */
+#ifndef _INTEL_GUC_SLPC_FWIF_H_
+#define _INTEL_GUC_SLPC_FWIF_H_
+
+#include 
+
+/* This file replicates the header in GuC code for handling SLPC related
+ * data structures and sizes
+ */


use proper format for multi-line comments:

/*
 * blah blah
 * blah blah
 */


done.




+
+/* SLPC exposes certain parameters for global configuration by the host.
+ * These are referred to as override parameters, because in most cases
+ * the host will not need to modify the default values used by SLPC.
+ * SLPC remembers the default values which allows the host to easily restore
+ * them by simply unsetting the override. The host can set or unset override
+ * parameters during SLPC (re-)initialization using the SLPC Reset event.
+ * The host can also set or unset override parameters on the fly using the
+ * Parameter Set and Parameter Unset events
+ */
+#define SLPC_MAX_OVERRIDE_PARAMETERS   256
+#define SLPC_OVERRIDE_BITFIELD_SIZE \
+   (SLPC_MAX_OVERRIDE_PARAMETERS / 32)
+
+#define SLPC_PAGE_SIZE_BYTES   4096
+#define SLPC_CACHELINE_SIZE_BYTES  64
+#define SLPC_SHARE_DATA_SIZE_BYTE_HEADER   SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARE_DATA_SIZE_BYTE_PLATFORM_INFO
SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARE_DATA_SIZE_BYTE_TASK_STATE   SLPC_CACHELINE_SIZE_BYTES
+#define SLPC_SHARE_DATA_MODE_DEFN_TABLE_SIZE   SLPC_PAGE_SIZE_BYTES


can you put some simply diagram that would describe this layout ?


done for the shared data struct.




+
+#define SLPC_SHARE_DATA_SIZE_BYTE_MAX  (2 * SLPC_PAGE_SIZE_BYTES)
+
+/* Cacheline size aligned (Total size needed for
+ * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes)
+ */
+#define SLPC_SHARE_DATA_SIZE_BYTE_PARAM
(SLPC_MAX_OVERRIDE_PARAMETERS * 4) \
+   + 
((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \
+   + 

Re: [PATCH v3] drm/dp_mst: Fix return code on sideband message failure

2021-07-13 Thread khsieh

On 2021-07-07 01:37, Jani Nikula wrote:

On Tue, 06 Jul 2021, Kuogee Hsieh  wrote:

From: Rajkumar Subbiah 

Commit 2f015ec6eab6 ("drm/dp_mst: Add sideband down request tracing +
selftests") added some debug code for sideband message tracing. But
it seems to have unintentionally changed the behavior on sideband 
message

failure. It catches and returns failure only if DRM_UT_DP is enabled.
Otherwise it ignores the error code and returns success. So on an MST
unplug, the caller is unaware that the clear payload message failed 
and

ends up waiting for 4 seconds for the response. Fixes the issue by
returning the proper error code.

Changes in V2:
-- Revise commit text as review comment
-- add Fixes text

Changes in V3:
-- remove "unlikely" optimization

Fixes: 2f015ec6eab6 ("drm/dp_mst: Add sideband down request tracing + 
selftests")


Signed-off-by: Rajkumar Subbiah 
Signed-off-by: Kuogee Hsieh 

Reviewed-by: Stephen Boyd 


Reviewed-by: Jani Nikula 



---

Lyude,
Any comments from you?
Thanks,


 drivers/gpu/drm/drm_dp_mst_topology.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c

index 1590144..df91110 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2887,11 +2887,13 @@ static int process_single_tx_qlock(struct 
drm_dp_mst_topology_mgr *mgr,

idx += tosend + 1;

ret = drm_dp_send_sideband_msg(mgr, up, chunk, idx);
-   if (unlikely(ret) && drm_debug_enabled(DRM_UT_DP)) {
-   struct drm_printer p = drm_debug_printer(DBG_PREFIX);
+   if (ret) {
+   if (drm_debug_enabled(DRM_UT_DP)) {
+   struct drm_printer p = drm_debug_printer(DBG_PREFIX);

-   drm_printf(, "sideband msg failed to send\n");
-   drm_dp_mst_dump_sideband_msg_tx(, txmsg);
+   drm_printf(, "sideband msg failed to send\n");
+   drm_dp_mst_dump_sideband_msg_tx(, txmsg);
+   }
return ret;
}


Re: [PATCH v9 1/4] dt-bindings:drm/bridge:anx7625:add vendor define flags

2021-07-13 Thread Rob Herring
On Wed, Jul 07, 2021 at 03:30:51PM +0800, Xin Ji wrote:
> On Thu, Jun 24, 2021 at 01:57:22PM +0200, Robert Foss wrote:
> > Hey Xin,
> > 
> > I would like to merge this series now, but this patch needs a review
> > first. Maybe Laurent/Rob Herring are good candidates.
> > 
> > 
> > Rob.
> Hi Rob, I get Laurent/Rob comments before, and explained why we needs
> these DT properties, so far, I didn't get any response.

Do I have to go dig that up? If it was more than a week ago, assume I 
don't remember. This is 1 of 100 bindings a week.

Justify why this is needed in your commit message.

> Hi Rob Herring and Laurent, for the DT property lane0/1-swing, Google
> engineer has strong demond for them, they don't want to move DP swing
> adjusting to kernel, thus may cause change the driver code in each
> project, so config them in DT is a best option.

Where's the ack from a Google engineer?

> 
> Thanks,
> Xin
> > 
> > On Tue, 22 Jun 2021 at 14:31, Xin Ji  wrote:
> > >
> > > Add 'bus-type' and 'data-lanes' define for port0. Define DP tx lane0,
> > > lane1 swing register array define, and audio enable flag.
> > >
> > > Signed-off-by: Xin Ji 
> > > ---
> > >  .../display/bridge/analogix,anx7625.yaml  | 57 ++-
> > >  1 file changed, 56 insertions(+), 1 deletion(-)
> > >
> > > diff --git 
> > > a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml 
> > > b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > index ab48ab2f4240..9e604d19a3d5 100644
> > > --- 
> > > a/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > +++ 
> > > b/Documentation/devicetree/bindings/display/bridge/analogix,anx7625.yaml
> > > @@ -43,6 +43,26 @@ properties:
> > >vdd33-supply:
> > >  description: Regulator that provides the supply 3.3V power.
> > >
> > > +  analogix,lane0-swing:
> > > +$ref: /schemas/types.yaml#/definitions/uint32-array
> > > +minItems: 1
> > > +maxItems: 20
> > > +description:
> > > +  an array of swing register setting for DP tx lane0 PHY, please 
> > > don't
> > > +  add this property, or contact vendor.

Why do we have the property if we're not supposed to add it.

> > > +
> > > +  analogix,lane1-swing:
> > > +$ref: /schemas/types.yaml#/definitions/uint32-array
> > > +minItems: 1
> > > +maxItems: 20
> > > +description:
> > > +  an array of swing register setting for DP tx lane1 PHY, please 
> > > don't
> > > +  add this property, or contact vendor.
> > > +
> > > +  analogix,audio-enable:
> > > +type: boolean
> > > +description: let the driver enable audio HDMI codec function or not.

Wouldn't we have a 'port' node if audio is to be enabled?

> > > +
> > >ports:
> > >  $ref: /schemas/graph.yaml#/properties/ports
> > >
> > > @@ -50,13 +70,43 @@ properties:
> > >port@0:
> > >  $ref: /schemas/graph.yaml#/properties/port
> > >  description:
> > > -  Video port for MIPI DSI input.
> > > +  MIPI DSI/DPI input.
> > > +
> > > +properties:
> > > +  endpoint:
> > > +$ref: /schemas/media/video-interfaces.yaml#
> > > +type: object
> > > +additionalProperties: false

Use 'unevaluatedProperties: false' instead...

> > > +
> > > +properties:
> > > +  remote-endpoint: true

...And drop this.

> > > +  bus-type: true

This device supports all the possible bus types? What's the default as 
it is not required?

> > > +  data-lanes: true

And up to 8 lanes? 

> > > +
> > > +required:
> > > +  - remote-endpoint
> > > +
> > > +required:
> > > +  - endpoint

You can drop both 'required'.

> > > +
> > >
> > >port@1:
> > >  $ref: /schemas/graph.yaml#/properties/port
> > >  description:
> > >Video port for panel or connector.
> > >
> > > +properties:
> > > +  endpoint:
> > > +$ref: /schemas/media/video-interfaces.yaml#

Doesn't look like anything from video-interfaces.yaml is used. This 
whole chunk is not needed.

> > > +type: object
> > > +additionalProperties: false
> > > +
> > > +properties:
> > > +  remote-endpoint: true
> > > +
> > > +required:
> > > +  - remote-endpoint
> > > +
> > >  required:
> > >- port@0
> > >- port@1
> > > @@ -87,6 +137,9 @@ examples:
> > >  vdd10-supply = <_mipibrdg>;
> > >  vdd18-supply = <_mipibrdg>;
> > >  vdd33-supply = <_mipibrdg>;
> > > +analogix,audio-enable;
> > > +analogix,lane0-swing = <0x14 0x54 0x64 0x74 0x29 0x7b 0x77 
> > > 0x5b>;
> > > +analogix,lane1-swing = <0x14 0x54 0x64 0x74 0x29 0x7b 0x77 
> > > 0x5b>;
> > >
> > >  ports {
> > >  #address-cells = <1>;
> > > @@ -96,6 +149,8 @@ examples:
> > > 

[PATCH v4 4/4] drm/vgem: use shmem helpers

2021-07-13 Thread Daniel Vetter
Aside from deleting lots of code the real motivation here is to switch
the mmap over to VM_PFNMAP, to be more consistent with what real gpu
drivers do. They're all VM_PFNMP, which means get_user_pages doesn't
work, and even if you try and there's a struct page behind that,
touching it and mucking around with its refcount can upset drivers
real bad.

v2: Review from Thomas:
- sort #include
- drop more dead code that I didn't spot somehow

v3: select DRM_GEM_SHMEM_HELPER to make it build (intel-gfx-ci)

v4: I got tricked by 0cf2ef46c6c0 ("drm/shmem-helper: Use cached
mappings by default"), and we need WC in vgem because vgem doesn't
have explicit begin/end cpu access ioctls.

Also add a comment why exactly vgem has to use wc.

v5: Don't set obj->base.funcs, it will default to drm_gem_shmem_funcs
(Thomas)

v6: vgem also needs an MMU for remapping

Cc: Thomas Zimmermann 
Acked-by: Thomas Zimmermann 
Cc: John Stultz 
Cc: Sumit Semwal 
Cc: "Christian König" 
Signed-off-by: Daniel Vetter 
Cc: Melissa Wen 
Cc: Chris Wilson 
---
 drivers/gpu/drm/Kconfig |   5 +-
 drivers/gpu/drm/vgem/vgem_drv.c | 315 ++--
 2 files changed, 15 insertions(+), 305 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 314eefa39892..28f7d2006e8b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -272,7 +272,8 @@ source "drivers/gpu/drm/kmb/Kconfig"
 
 config DRM_VGEM
tristate "Virtual GEM provider"
-   depends on DRM
+   depends on DRM && MMU
+   select DRM_GEM_SHMEM_HELPER
help
  Choose this option to get a virtual graphics memory manager,
  as used by Mesa's software renderer for enhanced performance.
@@ -280,7 +281,7 @@ config DRM_VGEM
 
 config DRM_VKMS
tristate "Virtual KMS (EXPERIMENTAL)"
-   depends on DRM
+   depends on DRM && MMU
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
select CRC32
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index bf38a7e319d1..ba410ba6b7f7 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -38,6 +38,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -50,87 +51,11 @@
 #define DRIVER_MAJOR   1
 #define DRIVER_MINOR   0
 
-static const struct drm_gem_object_funcs vgem_gem_object_funcs;
-
 static struct vgem_device {
struct drm_device drm;
struct platform_device *platform;
 } *vgem_device;
 
-static void vgem_gem_free_object(struct drm_gem_object *obj)
-{
-   struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
-
-   kvfree(vgem_obj->pages);
-   mutex_destroy(_obj->pages_lock);
-
-   if (obj->import_attach)
-   drm_prime_gem_destroy(obj, vgem_obj->table);
-
-   drm_gem_object_release(obj);
-   kfree(vgem_obj);
-}
-
-static vm_fault_t vgem_gem_fault(struct vm_fault *vmf)
-{
-   struct vm_area_struct *vma = vmf->vma;
-   struct drm_vgem_gem_object *obj = vma->vm_private_data;
-   /* We don't use vmf->pgoff since that has the fake offset */
-   unsigned long vaddr = vmf->address;
-   vm_fault_t ret = VM_FAULT_SIGBUS;
-   loff_t num_pages;
-   pgoff_t page_offset;
-   page_offset = (vaddr - vma->vm_start) >> PAGE_SHIFT;
-
-   num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
-
-   if (page_offset >= num_pages)
-   return VM_FAULT_SIGBUS;
-
-   mutex_lock(>pages_lock);
-   if (obj->pages) {
-   get_page(obj->pages[page_offset]);
-   vmf->page = obj->pages[page_offset];
-   ret = 0;
-   }
-   mutex_unlock(>pages_lock);
-   if (ret) {
-   struct page *page;
-
-   page = shmem_read_mapping_page(
-   file_inode(obj->base.filp)->i_mapping,
-   page_offset);
-   if (!IS_ERR(page)) {
-   vmf->page = page;
-   ret = 0;
-   } else switch (PTR_ERR(page)) {
-   case -ENOSPC:
-   case -ENOMEM:
-   ret = VM_FAULT_OOM;
-   break;
-   case -EBUSY:
-   ret = VM_FAULT_RETRY;
-   break;
-   case -EFAULT:
-   case -EINVAL:
-   ret = VM_FAULT_SIGBUS;
-   break;
-   default:
-   WARN_ON(PTR_ERR(page));
-   ret = VM_FAULT_SIGBUS;
-   break;
-   }
-
-   }
-   return ret;
-}
-
-static const struct vm_operations_struct vgem_gem_vm_ops = {
-   .fault = vgem_gem_fault,
-   .open = drm_gem_vm_open,
-   .close = drm_gem_vm_close,
-};
-
 static int 

[PATCH v4 1/4] dma-buf: Require VM_PFNMAP vma for mmap

2021-07-13 Thread Daniel Vetter
tldr; DMA buffers aren't normal memory, expecting that you can use
them like that (like calling get_user_pages works, or that they're
accounting like any other normal memory) cannot be guaranteed.

Since some userspace only runs on integrated devices, where all
buffers are actually all resident system memory, there's a huge
temptation to assume that a struct page is always present and useable
like for any more pagecache backed mmap. This has the potential to
result in a uapi nightmare.

To stop this gap require that DMA buffer mmaps are VM_PFNMAP, which
blocks get_user_pages and all the other struct page based
infrastructure for everyone. In spirit this is the uapi counterpart to
the kernel-internal CONFIG_DMABUF_DEBUG.

Motivated by a recent patch which wanted to swich the system dma-buf
heap to vm_insert_page instead of vm_insert_pfn.

v2:

Jason brought up that we also want to guarantee that all ptes have the
pte_special flag set, to catch fast get_user_pages (on architectures
that support this). Allowing VM_MIXEDMAP (like VM_SPECIAL does) would
still allow vm_insert_page, but limiting to VM_PFNMAP will catch that.

>From auditing the various functions to insert pfn pte entires
(vm_insert_pfn_prot, remap_pfn_range and all it's callers like
dma_mmap_wc) it looks like VM_PFNMAP is already required anyway, so
this should be the correct flag to check for.

References: 
https://lore.kernel.org/lkml/cakmk7uhi+mg0z0humnt13qccvuturvjpcr0njrl12k-wbwz...@mail.gmail.com/
Acked-by: Christian König 
Cc: Jason Gunthorpe 
Cc: Suren Baghdasaryan 
Cc: Matthew Wilcox 
Cc: John Stultz 
Signed-off-by: Daniel Vetter 
Cc: Sumit Semwal 
Cc: "Christian König" 
Cc: linux-me...@vger.kernel.org
Cc: linaro-mm-...@lists.linaro.org
--
Resending this so I can test the next two patches for vgem/shmem in
intel-gfx-ci. Last round failed somehow, but I can't repro that at all
locally here.

No immediate plans to merge this patch here since ttm isn't addressed
yet (and there we have the hugepte issue, for which I don't think we
have a clear consensus yet).
-Daniel
---
 drivers/dma-buf/dma-buf.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 510b42771974..65cbd7f0f16a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -130,6 +130,7 @@ static struct file_system_type dma_buf_fs_type = {
 static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
 {
struct dma_buf *dmabuf;
+   int ret;
 
if (!is_dma_buf_file(file))
return -EINVAL;
@@ -145,7 +146,11 @@ static int dma_buf_mmap_internal(struct file *file, struct 
vm_area_struct *vma)
dmabuf->size >> PAGE_SHIFT)
return -EINVAL;
 
-   return dmabuf->ops->mmap(dmabuf, vma);
+   ret = dmabuf->ops->mmap(dmabuf, vma);
+
+   WARN_ON(!(vma->vm_flags & VM_PFNMAP));
+
+   return ret;
 }
 
 static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
@@ -1276,6 +1281,8 @@ EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 unsigned long pgoff)
 {
+   int ret;
+
if (WARN_ON(!dmabuf || !vma))
return -EINVAL;
 
@@ -1296,7 +1303,11 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct 
vm_area_struct *vma,
vma_set_file(vma, dmabuf->file);
vma->vm_pgoff = pgoff;
 
-   return dmabuf->ops->mmap(dmabuf, vma);
+   ret = dmabuf->ops->mmap(dmabuf, vma);
+
+   WARN_ON(!(vma->vm_flags & VM_PFNMAP));
+
+   return ret;
 }
 EXPORT_SYMBOL_GPL(dma_buf_mmap);
 
-- 
2.32.0



[PATCH v4 3/4] drm/shmem-helpers: Allocate wc pages on x86

2021-07-13 Thread Daniel Vetter
intel-gfx-ci realized that something is not quite coherent anymore on
some platforms for our i915+vgem tests, when I tried to switch vgem
over to shmem helpers.

After lots of head-scratching I realized that I've removed calls to
drm_clflush. And we need those. To make this a bit cleaner use the
same page allocation tooling as ttm, which does internally clflush
(and more, as neeeded on any platform instead of just the intel x86
cpus i915 can be combined with).

Unfortunately this doesn't exist on arm, or as a generic feature. For
that I think only the dma-api can get at wc memory reliably, so maybe
we'd need some kind of GFP_WC flag to do this properly.

Signed-off-by: Daniel Vetter 
Cc: Christian König 
Cc: "Thomas Hellström" 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/drm_gem_shmem_helper.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 296ab1b7c07f..657d2490aaa5 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -10,6 +10,10 @@
 #include 
 #include 
 
+#ifdef CONFIG_X86
+#include 
+#endif
+
 #include 
 #include 
 #include 
@@ -162,6 +166,11 @@ static int drm_gem_shmem_get_pages_locked(struct 
drm_gem_shmem_object *shmem)
return PTR_ERR(pages);
}
 
+#ifdef CONFIG_X86
+   if (shmem->map_wc)
+   set_pages_array_wc(pages, obj->size >> PAGE_SHIFT);
+#endif
+
shmem->pages = pages;
 
return 0;
@@ -203,6 +212,11 @@ static void drm_gem_shmem_put_pages_locked(struct 
drm_gem_shmem_object *shmem)
if (--shmem->pages_use_count > 0)
return;
 
+#ifdef CONFIG_X86
+   if (shmem->map_wc)
+   set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT);
+#endif
+
drm_gem_put_pages(obj, shmem->pages,
  shmem->pages_mark_dirty_on_put,
  shmem->pages_mark_accessed_on_put);
-- 
2.32.0



[PATCH v4 2/4] drm/shmem-helper: Switch to vmf_insert_pfn

2021-07-13 Thread Daniel Vetter
We want to stop gup, which isn't the case if we use vmf_insert_page
and VM_MIXEDMAP, because that does not set pte_special.

v2: With this shmem gem helpers now definitely need CONFIG_MMU (0day)

v3: add more depends on MMU. For usb drivers this is a bit awkward,
but really it's correct: To be able to provide a contig mapping of
buffers to userspace on !MMU platforms we'd need to use the cma
helpers for these drivers on those platforms. As-is this wont work.

Also not exactly sure why vm_insert_page doesn't go boom, because that
definitely wont fly in practice since the pages are non-contig to
begin with.

Signed-off-by: Daniel Vetter 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/Kconfig| 2 +-
 drivers/gpu/drm/drm_gem_shmem_helper.c | 4 ++--
 drivers/gpu/drm/gud/Kconfig| 2 +-
 drivers/gpu/drm/tiny/Kconfig   | 4 ++--
 drivers/gpu/drm/udl/Kconfig| 1 +
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 0d372354c2d0..314eefa39892 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -211,7 +211,7 @@ config DRM_KMS_CMA_HELPER
 
 config DRM_GEM_SHMEM_HELPER
bool
-   depends on DRM
+   depends on DRM && MMU
help
  Choose this if you need the GEM shmem helper functions
 
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
b/drivers/gpu/drm/drm_gem_shmem_helper.c
index d5e6d4568f99..296ab1b7c07f 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -542,7 +542,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
} else {
page = shmem->pages[page_offset];
 
-   ret = vmf_insert_page(vma, vmf->address, page);
+   ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
}
 
mutex_unlock(>pages_lock);
@@ -612,7 +612,7 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct 
vm_area_struct *vma)
return ret;
}
 
-   vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
+   vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
if (shmem->map_wc)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
diff --git a/drivers/gpu/drm/gud/Kconfig b/drivers/gpu/drm/gud/Kconfig
index 1c8601bf4d91..9c1e61f9eec3 100644
--- a/drivers/gpu/drm/gud/Kconfig
+++ b/drivers/gpu/drm/gud/Kconfig
@@ -2,7 +2,7 @@
 
 config DRM_GUD
tristate "GUD USB Display"
-   depends on DRM && USB
+   depends on DRM && USB && MMU
select LZ4_COMPRESS
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
index 5593128eeff9..c11fb5be7d09 100644
--- a/drivers/gpu/drm/tiny/Kconfig
+++ b/drivers/gpu/drm/tiny/Kconfig
@@ -44,7 +44,7 @@ config DRM_CIRRUS_QEMU
 
 config DRM_GM12U320
tristate "GM12U320 driver for USB projectors"
-   depends on DRM && USB
+   depends on DRM && USB && MMU
select DRM_KMS_HELPER
select DRM_GEM_SHMEM_HELPER
help
@@ -53,7 +53,7 @@ config DRM_GM12U320
 
 config DRM_SIMPLEDRM
tristate "Simple framebuffer driver"
-   depends on DRM
+   depends on DRM && MMU
select DRM_GEM_SHMEM_HELPER
select DRM_KMS_HELPER
help
diff --git a/drivers/gpu/drm/udl/Kconfig b/drivers/gpu/drm/udl/Kconfig
index 1f497d8f1ae5..c744175c6992 100644
--- a/drivers/gpu/drm/udl/Kconfig
+++ b/drivers/gpu/drm/udl/Kconfig
@@ -4,6 +4,7 @@ config DRM_UDL
depends on DRM
depends on USB
depends on USB_ARCH_HAS_HCD
+   depends on MMU
select DRM_GEM_SHMEM_HELPER
select DRM_KMS_HELPER
help
-- 
2.32.0



[PATCH v4 0/4] shmem helpers for vgem

2021-07-13 Thread Daniel Vetter
Hi all

I've found another potential issue, so lets try this again and see what
intel-gfx-ci says. Also Thomas tried to unify vgem more, which motivated
me to dig this all out again.

Test-with: 20210527140732.5762-1-daniel.vet...@ffwll.ch

Review very much welcome, as always!

Cheers, Daniel

Daniel Vetter (4):
  dma-buf: Require VM_PFNMAP vma for mmap
  drm/shmem-helper: Switch to vmf_insert_pfn
  drm/shmem-helpers: Allocate wc pages on x86
  drm/vgem: use shmem helpers

 drivers/dma-buf/dma-buf.c  |  15 +-
 drivers/gpu/drm/Kconfig|   7 +-
 drivers/gpu/drm/drm_gem_shmem_helper.c |  18 +-
 drivers/gpu/drm/gud/Kconfig|   2 +-
 drivers/gpu/drm/tiny/Kconfig   |   4 +-
 drivers/gpu/drm/udl/Kconfig|   1 +
 drivers/gpu/drm/vgem/vgem_drv.c| 315 +
 7 files changed, 49 insertions(+), 313 deletions(-)

-- 
2.32.0



Re: [Intel-gfx] [PATCH] drm/i915/gt: Fix -EDEADLK handling regression

2021-07-13 Thread Ville Syrjälä
On Tue, Jul 13, 2021 at 09:59:18PM +0200, Daniel Vetter wrote:
> On Tue, Jul 13, 2021 at 9:58 PM Daniel Vetter  wrote:
> >
> > On Thu, Jul 1, 2021 at 9:07 AM Maarten Lankhorst
> >  wrote:
> > > Op 30-06-2021 om 18:44 schreef Ville Syrjala:
> > > > From: Ville Syrjälä 
> > > >
> > > > The conversion to ww mutexes failed to address the fence code which
> > > > already returns -EDEADLK when we run out of fences. Ww mutexes on
> > > > the other hand treat -EDEADLK as an internal errno value indicating
> > > > a need to restart the operation due to a deadlock. So now when the
> > > > fence code returns -EDEADLK the higher level code erroneously
> > > > restarts everything instead of returning the error to userspace
> > > > as is expected.
> > > >
> > > > To remedy this let's switch the fence code to use a different errno
> > > > value for this. -ENOBUFS seems like a semi-reasonable unique choice.
> > > > Apart from igt the only user of this I could find is sna, and even
> > > > there all we do is dump the current fence registers from debugfs
> > > > into the X server log. So no user visible functionality is affected.
> > > > If we really cared about preserving this we could of course convert
> > > > back to -EDEADLK higher up, but doesn't seem like that's worth
> > > > the hassle here.
> > > >
> > > > Not quite sure which commit specifically broke this, but I'll
> > > > just attribute it to the general gem ww mutex work.
> > > >
> > > > Cc: sta...@vger.kernel.org
> > > > Cc: Maarten Lankhorst 
> > > > Cc: Thomas Hellström 
> > > > Testcase: igt/gem_pread/exhaustion
> > > > Testcase: igt/gem_pwrite/basic-exhaustion
> > > > Testcase: igt/gem_fenced_exec_thrash/too-many-fences
> > > > Fixes: 80f0b679d6f0 ("drm/i915: Add an implementation for 
> > > > i915_gem_ww_ctx locking, v2.")
> > > > Signed-off-by: Ville Syrjälä 
> > > > ---
> > > >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c 
> > > > b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > > index cac7f3f44642..f8948de72036 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > > @@ -348,7 +348,7 @@ static struct i915_fence_reg *fence_find(struct 
> > > > i915_ggtt *ggtt)
> > > >   if (intel_has_pending_fb_unpin(ggtt->vm.i915))
> > > >   return ERR_PTR(-EAGAIN);
> > > >
> > > > - return ERR_PTR(-EDEADLK);
> > > > + return ERR_PTR(-ENOBUFS);
> > > >  }
> > > >
> > > >  int __i915_vma_pin_fence(struct i915_vma *vma)
> > >
> > > Makes sense..
> > >
> > > Reviewed-by: Maarten Lankhorst 
> > >
> > > Is it a slightly more reent commit? Might probably be the part that 
> > > converts execbuffer to use ww locks.
> >
> > - please cc: dri-devel on anything gem/gt related.

Thought I did. Apparently got lost somewhere.

> > - this should probably be ENOSPC or something like that for at least a
> > seeming retention of errno consistentcy:

ENOSPC is already used for other things.

> >
> > https://dri.freedesktop.org/docs/drm/gpu/drm-uapi.html#recommended-ioctl-return-values
> 
> Other option would be to map that back to EDEADLK in the execbuf ioctl
> somewhere, so we retain a distinct errno code.

Already mentioned in the commit msg.

-- 
Ville Syrjälä
Intel


Re: [PATCH] dim/drm-misc: Add rule to not push patches with issues

2021-07-13 Thread Daniel Vetter
On Fri, Jul 9, 2021 at 10:11 AM Daniel Vetter  wrote:
>
> We kinda left this out, and I like the wording from the drm-intel
> side, so add that. Motivated by a discussion with Christian.
>
> Cc: Christian König 
> Cc: Maarten Lankhorst 
> Cc: Maxime Ripard 
> Cc: Thomas Zimmermann 
> Signed-off-by: Daniel Vetter 

Pushed, thanks for all the r-b/acks.
-Daniel

> ---
>  committer-drm-misc.rst | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/committer-drm-misc.rst b/committer-drm-misc.rst
> index 9497a5d26a9d..110ca8b0525e 100644
> --- a/committer-drm-misc.rst
> +++ b/committer-drm-misc.rst
> @@ -21,6 +21,9 @@ Merge Criteria
>
>  Right now the only hard merge criteria are:
>
> +* There must not be open issues or unresolved or conflicting feedback from
> +  anyone. Clear them up first. Defer to maintainers as needed.
> +
>  * Patch is properly reviewed or at least Ack, i.e. don't just push your own
>stuff directly. This rule holds even more for bugfix patches - it would be
>embarrassing if the bugfix contains a small gotcha that review would have
> --
> 2.32.0
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH] drm/fb-helper: Try to protect cleanup against delayed setup

2021-07-13 Thread Sam Ravnborg
Hi Daniel,

On Tue, Jul 13, 2021 at 03:59:22PM +0200, Daniel Vetter wrote:
> Some vague evidences suggests this can go wrong. Try to prevent it by
> holding the right mutex and clearing ->deferred_setup to make sure we
> later on don't accidentally try to re-register the fbdev when the
> driver thought it had it all cleaned up already.
> 
> v2: I realized that this is fundamentally butchered, and CI complained
> about lockdep splats. So limit the critical section again and just add
> a few notes what the proper fix is.
> 
> References: 
> https://intel-gfx-ci.01.org/tree/linux-next/next-20201215/fi-byt-j1900/igt@i915_pm_...@module-reload.html
> Signed-off-by: Daniel Vetter 
> Cc: Ville Syrjälä 
> Cc: Chris Wilson 
> Cc: Maarten Lankhorst 
> Cc: Maxime Ripard 
> Cc: Thomas Zimmermann 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> ---
>  drivers/gpu/drm/drm_fb_helper.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
> index 9d82fda274eb..8f11e5abb222 100644
> --- a/drivers/gpu/drm/drm_fb_helper.c
> +++ b/drivers/gpu/drm/drm_fb_helper.c
> @@ -598,6 +598,9 @@ EXPORT_SYMBOL(drm_fb_helper_alloc_fbi);
>   * A wrapper around unregister_framebuffer, to release the fb_info
>   * framebuffer device. This must be called before releasing all resources for
>   * @fb_helper by calling drm_fb_helper_fini().
> + *
> + * Note that this is fundamentally racy on hotunload because it doen't handle
s/doen't/doesn't/
> + * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
>   */
>  void drm_fb_helper_unregister_fbi(struct drm_fb_helper *fb_helper)
>  {
> @@ -611,6 +614,9 @@ EXPORT_SYMBOL(drm_fb_helper_unregister_fbi);
>   * @fb_helper: driver-allocated fbdev helper, can be NULL
>   *
>   * This cleans up all remaining resources associated with @fb_helper.
> + *
> + * Note that this is fundamentally racy on hotunload because it doen't handle
s/doen't/doesn't/
> + * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
>   */
>  void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
>  {
> @@ -2382,6 +2388,10 @@ static void drm_fbdev_client_unregister(struct 
> drm_client_dev *client)
>  {
>   struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
>  
> + mutex_lock(_helper->lock);
> + fb_helper->deferred_setup = false;
> + mutex_unlock(_helper->lock);
> +
>   if (fb_helper->fbdev)
>   /* drm_fbdev_fb_destroy() takes care of cleanup */
>   drm_fb_helper_unregister_fbi(fb_helper);

I could not find any better spot to clear deferred_setup - so I think
this is OK.

With the two spellign issues fixed:
Acked-by: Sam Ravnborg 

No r-b as I an not too fluent in these code paths and all the locking.

Sam


Re: [Intel-gfx] [PATCH] drm/i915/gt: Fix -EDEADLK handling regression

2021-07-13 Thread Rodrigo Vivi
On Tue, Jul 13, 2021 at 09:59:18PM +0200, Daniel Vetter wrote:
> On Tue, Jul 13, 2021 at 9:58 PM Daniel Vetter  wrote:
> >
> > On Thu, Jul 1, 2021 at 9:07 AM Maarten Lankhorst
> >  wrote:
> > > Op 30-06-2021 om 18:44 schreef Ville Syrjala:
> > > > From: Ville Syrjälä 
> > > >
> > > > The conversion to ww mutexes failed to address the fence code which
> > > > already returns -EDEADLK when we run out of fences. Ww mutexes on
> > > > the other hand treat -EDEADLK as an internal errno value indicating
> > > > a need to restart the operation due to a deadlock. So now when the
> > > > fence code returns -EDEADLK the higher level code erroneously
> > > > restarts everything instead of returning the error to userspace
> > > > as is expected.
> > > >
> > > > To remedy this let's switch the fence code to use a different errno
> > > > value for this. -ENOBUFS seems like a semi-reasonable unique choice.
> > > > Apart from igt the only user of this I could find is sna, and even
> > > > there all we do is dump the current fence registers from debugfs
> > > > into the X server log. So no user visible functionality is affected.
> > > > If we really cared about preserving this we could of course convert
> > > > back to -EDEADLK higher up, but doesn't seem like that's worth
> > > > the hassle here.
> > > >
> > > > Not quite sure which commit specifically broke this, but I'll
> > > > just attribute it to the general gem ww mutex work.
> > > >
> > > > Cc: sta...@vger.kernel.org
> > > > Cc: Maarten Lankhorst 
> > > > Cc: Thomas Hellström 
> > > > Testcase: igt/gem_pread/exhaustion
> > > > Testcase: igt/gem_pwrite/basic-exhaustion
> > > > Testcase: igt/gem_fenced_exec_thrash/too-many-fences
> > > > Fixes: 80f0b679d6f0 ("drm/i915: Add an implementation for 
> > > > i915_gem_ww_ctx locking, v2.")
> > > > Signed-off-by: Ville Syrjälä 
> > > > ---
> > > >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c 
> > > > b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > > index cac7f3f44642..f8948de72036 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > > @@ -348,7 +348,7 @@ static struct i915_fence_reg *fence_find(struct 
> > > > i915_ggtt *ggtt)
> > > >   if (intel_has_pending_fb_unpin(ggtt->vm.i915))
> > > >   return ERR_PTR(-EAGAIN);
> > > >
> > > > - return ERR_PTR(-EDEADLK);
> > > > + return ERR_PTR(-ENOBUFS);
> > > >  }
> > > >
> > > >  int __i915_vma_pin_fence(struct i915_vma *vma)
> > >
> > > Makes sense..
> > >
> > > Reviewed-by: Maarten Lankhorst 
> > >
> > > Is it a slightly more reent commit? Might probably be the part that 
> > > converts execbuffer to use ww locks.
> >
> > - please cc: dri-devel on anything gem/gt related.
> > - this should probably be ENOSPC or something like that for at least a
> > seeming retention of errno consistentcy:
> >
> > https://dri.freedesktop.org/docs/drm/gpu/drm-uapi.html#recommended-ioctl-return-values
> 
> Other option would be to map that back to EDEADLK in the execbuf ioctl
> somewhere, so we retain a distinct errno code.

I'm about to push this patch to drm-intel-fixes... I'm assuming if there's any 
fix it will
be a follow-up patch and not a revert or force push, right?!

> -Daniel
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [PATCH v1 1/1] drm: bridge: Mark mode_fixup deprecated

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 9:33 PM Sam Ravnborg  wrote:
>
> Make it obvious that mode_fixup is deprecated and new drivers shall use
> atomic_check.
>
> Signed-off-by: Sam Ravnborg 
> Cc: Laurent Pinchart 
> Cc: Andrzej Hajda 
> Cc: Maarten Lankhorst 
> Cc: Maxime Ripard 
> Cc: Thomas Zimmermann 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> ---
>  include/drm/drm_bridge.h | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
> index 46bdfa48c413..668f14234459 100644
> --- a/include/drm/drm_bridge.h
> +++ b/include/drm/drm_bridge.h
> @@ -136,6 +136,9 @@ struct drm_bridge_funcs {
>  *
>  * NOTE:
>  *
> +* This is deprecated, do not use!
> +* New drivers shall use _bridge_funcs.atomic_check.

Bit a bikeshed, but in the rendered text this will be one line, I'd
just merge it. Also since this is a reference within the same struct
just use @atomic_check.

Now the real review: There's a pile more of these in
drm_modeset_helper_vtables.h (well in crtc and encoder funcs, so only
two, a small pile). Can you pls fix those up too? With that:

Reviewed-by: Daniel Vetter 

> +*
>  * This function is called in the check phase of atomic modesets, 
> which
>  * can be aborted for any reason (including on userspace's request to
>  * just check whether a configuration would be possible). Drivers MUST
> --
> 2.30.2
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [Intel-gfx] [PATCH] drm/i915/gt: Fix -EDEADLK handling regression

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 9:58 PM Daniel Vetter  wrote:
>
> On Thu, Jul 1, 2021 at 9:07 AM Maarten Lankhorst
>  wrote:
> > Op 30-06-2021 om 18:44 schreef Ville Syrjala:
> > > From: Ville Syrjälä 
> > >
> > > The conversion to ww mutexes failed to address the fence code which
> > > already returns -EDEADLK when we run out of fences. Ww mutexes on
> > > the other hand treat -EDEADLK as an internal errno value indicating
> > > a need to restart the operation due to a deadlock. So now when the
> > > fence code returns -EDEADLK the higher level code erroneously
> > > restarts everything instead of returning the error to userspace
> > > as is expected.
> > >
> > > To remedy this let's switch the fence code to use a different errno
> > > value for this. -ENOBUFS seems like a semi-reasonable unique choice.
> > > Apart from igt the only user of this I could find is sna, and even
> > > there all we do is dump the current fence registers from debugfs
> > > into the X server log. So no user visible functionality is affected.
> > > If we really cared about preserving this we could of course convert
> > > back to -EDEADLK higher up, but doesn't seem like that's worth
> > > the hassle here.
> > >
> > > Not quite sure which commit specifically broke this, but I'll
> > > just attribute it to the general gem ww mutex work.
> > >
> > > Cc: sta...@vger.kernel.org
> > > Cc: Maarten Lankhorst 
> > > Cc: Thomas Hellström 
> > > Testcase: igt/gem_pread/exhaustion
> > > Testcase: igt/gem_pwrite/basic-exhaustion
> > > Testcase: igt/gem_fenced_exec_thrash/too-many-fences
> > > Fixes: 80f0b679d6f0 ("drm/i915: Add an implementation for i915_gem_ww_ctx 
> > > locking, v2.")
> > > Signed-off-by: Ville Syrjälä 
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c 
> > > b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > index cac7f3f44642..f8948de72036 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > > @@ -348,7 +348,7 @@ static struct i915_fence_reg *fence_find(struct 
> > > i915_ggtt *ggtt)
> > >   if (intel_has_pending_fb_unpin(ggtt->vm.i915))
> > >   return ERR_PTR(-EAGAIN);
> > >
> > > - return ERR_PTR(-EDEADLK);
> > > + return ERR_PTR(-ENOBUFS);
> > >  }
> > >
> > >  int __i915_vma_pin_fence(struct i915_vma *vma)
> >
> > Makes sense..
> >
> > Reviewed-by: Maarten Lankhorst 
> >
> > Is it a slightly more reent commit? Might probably be the part that 
> > converts execbuffer to use ww locks.
>
> - please cc: dri-devel on anything gem/gt related.
> - this should probably be ENOSPC or something like that for at least a
> seeming retention of errno consistentcy:
>
> https://dri.freedesktop.org/docs/drm/gpu/drm-uapi.html#recommended-ioctl-return-values

Other option would be to map that back to EDEADLK in the execbuf ioctl
somewhere, so we retain a distinct errno code.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [Intel-gfx] [PATCH] drm/i915/gt: Fix -EDEADLK handling regression

2021-07-13 Thread Daniel Vetter
On Thu, Jul 1, 2021 at 9:07 AM Maarten Lankhorst
 wrote:
> Op 30-06-2021 om 18:44 schreef Ville Syrjala:
> > From: Ville Syrjälä 
> >
> > The conversion to ww mutexes failed to address the fence code which
> > already returns -EDEADLK when we run out of fences. Ww mutexes on
> > the other hand treat -EDEADLK as an internal errno value indicating
> > a need to restart the operation due to a deadlock. So now when the
> > fence code returns -EDEADLK the higher level code erroneously
> > restarts everything instead of returning the error to userspace
> > as is expected.
> >
> > To remedy this let's switch the fence code to use a different errno
> > value for this. -ENOBUFS seems like a semi-reasonable unique choice.
> > Apart from igt the only user of this I could find is sna, and even
> > there all we do is dump the current fence registers from debugfs
> > into the X server log. So no user visible functionality is affected.
> > If we really cared about preserving this we could of course convert
> > back to -EDEADLK higher up, but doesn't seem like that's worth
> > the hassle here.
> >
> > Not quite sure which commit specifically broke this, but I'll
> > just attribute it to the general gem ww mutex work.
> >
> > Cc: sta...@vger.kernel.org
> > Cc: Maarten Lankhorst 
> > Cc: Thomas Hellström 
> > Testcase: igt/gem_pread/exhaustion
> > Testcase: igt/gem_pwrite/basic-exhaustion
> > Testcase: igt/gem_fenced_exec_thrash/too-many-fences
> > Fixes: 80f0b679d6f0 ("drm/i915: Add an implementation for i915_gem_ww_ctx 
> > locking, v2.")
> > Signed-off-by: Ville Syrjälä 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c 
> > b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > index cac7f3f44642..f8948de72036 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
> > @@ -348,7 +348,7 @@ static struct i915_fence_reg *fence_find(struct 
> > i915_ggtt *ggtt)
> >   if (intel_has_pending_fb_unpin(ggtt->vm.i915))
> >   return ERR_PTR(-EAGAIN);
> >
> > - return ERR_PTR(-EDEADLK);
> > + return ERR_PTR(-ENOBUFS);
> >  }
> >
> >  int __i915_vma_pin_fence(struct i915_vma *vma)
>
> Makes sense..
>
> Reviewed-by: Maarten Lankhorst 
>
> Is it a slightly more reent commit? Might probably be the part that converts 
> execbuffer to use ww locks.

- please cc: dri-devel on anything gem/gt related.
- this should probably be ENOSPC or something like that for at least a
seeming retention of errno consistentcy:

https://dri.freedesktop.org/docs/drm/gpu/drm-uapi.html#recommended-ioctl-return-values

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH] video: fbdev: kyrofb: fix a DoS bug by restricting user input

2021-07-13 Thread Sam Ravnborg
Hi 
On Tue, Jul 13, 2021 at 12:51:14PM +, Zheyu Ma wrote:
> The user can pass in any value to the driver through the 'ioctl'
> interface. The driver dost not check, which may cause DoS bugs.
> 
> Fix this by checking if the divisor is 0

This fix is trying to avoid the situation on a too low layer.
Could you please try to look at validating the inputs on a much higher
level, in kyro_dev_overlay_viewport_set().
Then the checks are much more obvious and you are more likely to catch a
number of similar bugs that the test just fails to trigger for now.

Sam

> 
> The following log reveals it:
> 
> divide error:  [#1] PREEMPT SMP KASAN PTI
> RIP: 0010:SetOverlayViewPort+0x133/0x5f0 
> drivers/video/fbdev/kyro/STG4000OverlayDevice.c:476
> Call Trace:
>  kyro_dev_overlay_viewport_set drivers/video/fbdev/kyro/fbdev.c:378 [inline]
>  kyrofb_ioctl+0x2eb/0x330 drivers/video/fbdev/kyro/fbdev.c:603
>  do_fb_ioctl+0x1f3/0x700 drivers/video/fbdev/core/fbmem.c:1171
>  fb_ioctl+0xeb/0x130 drivers/video/fbdev/core/fbmem.c:1185
>  vfs_ioctl fs/ioctl.c:48 [inline]
>  __do_sys_ioctl fs/ioctl.c:753 [inline]
>  __se_sys_ioctl fs/ioctl.c:739 [inline]
>  __x64_sys_ioctl+0x19b/0x220 fs/ioctl.c:739
>  do_syscall_64+0x32/0x80 arch/x86/entry/common.c:46
>  entry_SYSCALL_64_after_hwframe+0x44/0xae
> 
> Signed-off-by: Zheyu Ma 
> ---
>  drivers/video/fbdev/kyro/STG4000OverlayDevice.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/video/fbdev/kyro/STG4000OverlayDevice.c 
> b/drivers/video/fbdev/kyro/STG4000OverlayDevice.c
> index 9fde0e3b69ec..29d692fe5e75 100644
> --- a/drivers/video/fbdev/kyro/STG4000OverlayDevice.c
> +++ b/drivers/video/fbdev/kyro/STG4000OverlayDevice.c
> @@ -407,6 +407,9 @@ int SetOverlayViewPort(volatile STG4000REG __iomem 
> *pSTGReg,
>   ulVertDecFactor = 1;
>   }
>  
> + if ((ulDest + 1) == 0)
> + return -EINVAL;
> +
>   ulDacYScale = ((ulSrc - 1) * 2048) / (ulDest + 1);
>  
>   tmp = STG_READ_REG(DACOverlayVtDec);/* Decimation */
> @@ -471,6 +474,9 @@ int SetOverlayViewPort(volatile STG4000REG __iomem 
> *pSTGReg,
>*/
>   ulScaleLeft = ulSrcLeft;
>  
> + if ((ulRight - ulLeft + 2) == 0)
> + return -EINVAL;
> +
>   /* shift fxscale until it is in the range of the scaler */
>   ulhDecim = 0;
>   ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / 
> (ulRight - ulLeft + 2);
> -- 
> 2.17.6


Re: [RFC] drm: return int error code from mode_fixup

2021-07-13 Thread Sam Ravnborg
On Tue, Jul 13, 2021 at 07:44:12PM +0200, Daniel Vetter wrote:
> On Tue, Jul 13, 2021 at 7:14 PM Grace An  wrote:
> > When CONFIG_PROVE_LOCKING is defined, the kernel randomly injects
> > -EDEADLK errors for all the ww_mutex. This results in
> > drm_atomic_get_private_obj_state randomly returning -EDEADLK.
> > However, the mode_fixup functions do not propagate these error
> > codes and return false, causing the atomic commit to fail with
> > -EINVAL instead of retrying.
> >
> > Change encoder, crtc, and bridge mode_fixup functions to return
> > an int instead of a boolean to indicate success or failure. If
> > any of these functions fail, the mode_fixup function now returns
> > the provided integer error code instead of -EINVAL.
> >
> > This change needs modifications across drivers, but before submitting
> > the entire change, we want to get feedback on this RFC.
> >
> > Signed-off-by: Grace An 
> 
> Why don't you just use the various atomic_check hooks we have for
> this? There you get passed the state and everything, have a full int
> return value, and things actually work.
> 
> ->mode_fixup is for compatibility with legacy crtc modeset helpers
> from the pre-atomic times. If the kerneldoc isn't clear yet, please do
> a patch to fix that up so that @mode_fixup points at the relevant
> @atomic_check as the recommended function.
Agreed, and we need to document this better.

I have posted the following patch to make it more obvious that
mode_fixup is deprecated.
https://lore.kernel.org/dri-devel/20210713193257.958852-1-...@ravnborg.org/T/#u

Sam


[PATCH v1 1/1] drm: bridge: Mark mode_fixup deprecated

2021-07-13 Thread Sam Ravnborg
Make it obvious that mode_fixup is deprecated and new drivers shall use
atomic_check.

Signed-off-by: Sam Ravnborg 
Cc: Laurent Pinchart 
Cc: Andrzej Hajda 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
---
 include/drm/drm_bridge.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 46bdfa48c413..668f14234459 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -136,6 +136,9 @@ struct drm_bridge_funcs {
 *
 * NOTE:
 *
+* This is deprecated, do not use!
+* New drivers shall use _bridge_funcs.atomic_check.
+*
 * This function is called in the check phase of atomic modesets, which
 * can be aborted for any reason (including on userspace's request to
 * just check whether a configuration would be possible). Drivers MUST
-- 
2.30.2



[PATCH V2] video: backlight: Drop maximum brightness override for brightness zero

2021-07-13 Thread Marek Vasut
The note in c2adda27d202f ("video: backlight: Add of_find_backlight helper
in backlight.c") says that gpio-backlight uses brightness as power state.
This has been fixed since in ec665b756e6f7 ("backlight: gpio-backlight:
Correct initial power state handling") and other backlight drivers do not
require this workaround. Drop the workaround.

This fixes the case where e.g. pwm-backlight can perfectly well be set to
brightness 0 on boot in DT, which without this patch leads to the display
brightness to be max instead of off.

Fixes: c2adda27d202f ("video: backlight: Add of_find_backlight helper in 
backlight.c")
Acked-by: Noralf Trønnes 
Reviewed-by: Daniel Thompson 
Cc:  # 5.4+
Cc:  # 4.19.x: ec665b756e6f7: backlight: 
gpio-backlight: Correct initial power state handling
Signed-off-by: Marek Vasut 
Cc: Daniel Thompson 
Cc: Meghana Madhyastha 
Cc: Noralf Trønnes 
Cc: Sean Paul 
Cc: Thierry Reding 
---
V2: Add AB/RB, CC stable
---
 drivers/video/backlight/backlight.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/video/backlight/backlight.c 
b/drivers/video/backlight/backlight.c
index 537fe1b376ad7..fc990e576340b 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -688,12 +688,6 @@ static struct backlight_device *of_find_backlight(struct 
device *dev)
of_node_put(np);
if (!bd)
return ERR_PTR(-EPROBE_DEFER);
-   /*
-* Note: gpio_backlight uses brightness as
-* power state during probe
-*/
-   if (!bd->props.brightness)
-   bd->props.brightness = bd->props.max_brightness;
}
}
 
-- 
2.30.2



Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Ville Syrjälä
On Tue, Jul 13, 2021 at 07:24:23PM +0100, Matthew Auld wrote:
> On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
>  wrote:
> >
> > On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> > >  wrote:
> > > >
> > > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > > + /**
> > > > > +  * @cache_coherent:
> > > > > +  *
> > > > > +  * Track whether the pages are coherent with the GPU if reading 
> > > > > or
> > > > > +  * writing through the CPU cache.
> > > > > +  *
> > > > > +  * This largely depends on the @cache_level, for example if the 
> > > > > object
> > > > > +  * is marked as I915_CACHE_LLC, then GPU access is coherent for 
> > > > > both
> > > > > +  * reads and writes through the CPU cache.
> > > > > +  *
> > > > > +  * Note that on platforms with shared-LLC support(HAS_LLC) 
> > > > > reads through
> > > > > +  * the CPU cache are always coherent, regardless of the 
> > > > > @cache_level. On
> > > > > +  * snooping based platforms this is not the case, unless the 
> > > > > full
> > > > > +  * I915_CACHE_LLC or similar setting is used.
> > > > > +  *
> > > > > +  * As a result of this we need to track coherency separately 
> > > > > for reads
> > > > > +  * and writes, in order to avoid superfluous flushing on 
> > > > > shared-LLC
> > > > > +  * platforms, for reads.
> > > > > +  *
> > > > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > > +  *
> > > > > +  * When reading through the CPU cache, the GPU is still 
> > > > > coherent. Note
> > > > > +  * that no data has actually been modified here, so it might 
> > > > > seem
> > > > > +  * strange that we care about this.
> > > > > +  *
> > > > > +  * As an example, if some object is mapped on the CPU with 
> > > > > write-back
> > > > > +  * caching, and we read some page, then the cache likely now 
> > > > > contains
> > > > > +  * the data from that read. At this point the cache and main 
> > > > > memory
> > > > > +  * match up, so all good. But next the GPU needs to write some 
> > > > > data to
> > > > > +  * that same page. Now if the @cache_level is I915_CACHE_NONE 
> > > > > and the
> > > > > +  * the platform doesn't have the shared-LLC, then the GPU will
> > > > > +  * effectively skip invalidating the cache(or however that works
> > > > > +  * internally) when writing the new value.  This is really bad 
> > > > > since the
> > > > > +  * GPU has just written some new data to main memory, but the 
> > > > > CPU cache
> > > > > +  * is still valid and now contains stale data. As a result the 
> > > > > next time
> > > > > +  * we do a cached read with the CPU, we are rewarded with stale 
> > > > > data.
> > > > > +  * Likewise if the cache is later flushed, we might be rewarded 
> > > > > with
> > > > > +  * overwriting main memory with stale data.
> > > > > +  *
> > > > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > > +  *
> > > > > +  * When writing through the CPU cache, the GPU is still 
> > > > > coherent. Note
> > > > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > > +  *
> > > > > +  * This is never set when I915_CACHE_NONE is used for 
> > > > > @cache_level,
> > > > > +  * where instead we have to manually flush the caches after 
> > > > > writing
> > > > > +  * through the CPU cache. For other cache levels this should be 
> > > > > set and
> > > > > +  * the object is therefore considered coherent for both reads 
> > > > > and writes
> > > > > +  * through the CPU cache.
> > > >
> > > > I don't remember why we have this read vs. write split and this new
> > > > documentation doesn't seem to really explain it either.
> > >
> > > Hmm, I attempted to explain that earlier:
> > >
> > > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > > * the CPU cache are always coherent, regardless of the @cache_level. On
> > > * snooping based platforms this is not the case, unless the full
> > > * I915_CACHE_LLC or similar setting is used.
> > > *
> > > * As a result of this we need to track coherency separately for reads
> > > * and writes, in order to avoid superfluous flushing on shared-LLC
> > > * platforms, for reads.
> > >
> > > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > > also not being coherent for writes(CACHE_NONE),
> >
> > CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> > never heard of any mechanism that would make it only partially coherent.
> 
> What do you mean by "comes to LLC", are you talking about HAS_LLC() or
> I915_CACHE_LLC?

I'm talking about the actual cache.

> 
> If you set I915_CACHE_LLC, then yes it is fully coherent for both
> HAS_LLC() and HAS_SNOOP().
> 
> If you set I915_CACHE_NONE, then reads are still coherent on
> HAS_LLC(),

Reads and 

Re: [PATCH 21/47] drm/i915/guc: Ensure G2H response has space in buffer

2021-07-13 Thread John Harrison

On 6/24/2021 00:04, Matthew Brost wrote:

Ensure G2H response has space in the buffer before sending H2G CTB as
the GuC can't handle any backpressure on the G2H interface.

Signed-off-by: John Harrison 
Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc.h| 13 +++-
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 76 +++
  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  4 +-
  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h   |  4 +
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 ++--
  5 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index b43ec56986b5..24e7a924134e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -95,11 +95,17 @@ inline int intel_guc_send(struct intel_guc *guc, const u32 
*action, u32 len)
  }
  
  #define INTEL_GUC_SEND_NB		BIT(31)

+#define INTEL_GUC_SEND_G2H_DW_SHIFT0
+#define INTEL_GUC_SEND_G2H_DW_MASK (0xff << INTEL_GUC_SEND_G2H_DW_SHIFT)
+#define MAKE_SEND_FLAGS(len) \
+   ({GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_SEND_G2H_DW_MASK, len)); \
+   (FIELD_PREP(INTEL_GUC_SEND_G2H_DW_MASK, len) | INTEL_GUC_SEND_NB);})
  static
-inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len)
+inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len,
+u32 g2h_len_dw)
  {
return intel_guc_ct_send(>ct, action, len, NULL, 0,
-INTEL_GUC_SEND_NB);
+MAKE_SEND_FLAGS(g2h_len_dw));
  }
  
  static inline int

@@ -113,6 +119,7 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 
*action, u32 len,
  static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
   const u32 *action,
   u32 len,
+  u32 g2h_len_dw,
   bool loop)
  {
int err;
@@ -121,7 +128,7 @@ static inline int intel_guc_send_busy_loop(struct 
intel_guc* guc,
might_sleep_if(loop && (!in_atomic() && !irqs_disabled()));
  
  retry:

-   err = intel_guc_send_nb(guc, action, len);
+   err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
if (unlikely(err == -EBUSY && loop)) {
if (likely(!in_atomic() && !irqs_disabled()))
cond_resched();
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 7491f041859e..a60970e85635 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -73,6 +73,7 @@ static inline struct drm_device *ct_to_drm(struct 
intel_guc_ct *ct)
  #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
  #define CTB_H2G_BUFFER_SIZE   (SZ_4K)
  #define CTB_G2H_BUFFER_SIZE   (4 * CTB_H2G_BUFFER_SIZE)
+#define G2H_ROOM_BUFFER_SIZE   (PAGE_SIZE)
Any particular reason why PAGE_SIZE instead of SZ_4K? I'm not seeing 
anything in the code that is actually related to page sizes. Seems like 
'(CTB_G2H_BUFFER_SIZE / 4)' would be a more correct way to express it. 
Unless I'm missing something about how it's used?


John.


  
  struct ct_request {

struct list_head link;
@@ -129,23 +130,27 @@ static void guc_ct_buffer_desc_init(struct 
guc_ct_buffer_desc *desc)
  
  static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)

  {
+   u32 space;
+
ctb->broken = false;
ctb->tail = 0;
ctb->head = 0;
-   ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
+   space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space;
+   atomic_set(>space, space);
  
  	guc_ct_buffer_desc_init(ctb->desc);

  }
  
  static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb,

   struct guc_ct_buffer_desc *desc,
-  u32 *cmds, u32 size_in_bytes)
+  u32 *cmds, u32 size_in_bytes, u32 resv_space)
  {
GEM_BUG_ON(size_in_bytes % 4);
  
  	ctb->desc = desc;

ctb->cmds = cmds;
ctb->size = size_in_bytes / 4;
+   ctb->resv_space = resv_space / 4;
  
  	guc_ct_buffer_reset(ctb);

  }
@@ -226,6 +231,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
struct guc_ct_buffer_desc *desc;
u32 blob_size;
u32 cmds_size;
+   u32 resv_space;
void *blob;
u32 *cmds;
int err;
@@ -250,19 +256,23 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
desc = blob;
cmds = blob + 2 * CTB_DESC_SIZE;
cmds_size = CTB_H2G_BUFFER_SIZE;
-   CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send",
-ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size);
+   resv_space = 0;
+   CT_DEBUG(ct, "%s desc %#tx cmds %#tx size 

Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Matthew Auld
On Tue, 13 Jul 2021 at 18:47, Ville Syrjälä
 wrote:
>
> On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> > On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
> >  wrote:
> > >
> > > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > > + /**
> > > > +  * @cache_coherent:
> > > > +  *
> > > > +  * Track whether the pages are coherent with the GPU if reading or
> > > > +  * writing through the CPU cache.
> > > > +  *
> > > > +  * This largely depends on the @cache_level, for example if the 
> > > > object
> > > > +  * is marked as I915_CACHE_LLC, then GPU access is coherent for 
> > > > both
> > > > +  * reads and writes through the CPU cache.
> > > > +  *
> > > > +  * Note that on platforms with shared-LLC support(HAS_LLC) reads 
> > > > through
> > > > +  * the CPU cache are always coherent, regardless of the 
> > > > @cache_level. On
> > > > +  * snooping based platforms this is not the case, unless the full
> > > > +  * I915_CACHE_LLC or similar setting is used.
> > > > +  *
> > > > +  * As a result of this we need to track coherency separately for 
> > > > reads
> > > > +  * and writes, in order to avoid superfluous flushing on 
> > > > shared-LLC
> > > > +  * platforms, for reads.
> > > > +  *
> > > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > > +  *
> > > > +  * When reading through the CPU cache, the GPU is still coherent. 
> > > > Note
> > > > +  * that no data has actually been modified here, so it might seem
> > > > +  * strange that we care about this.
> > > > +  *
> > > > +  * As an example, if some object is mapped on the CPU with 
> > > > write-back
> > > > +  * caching, and we read some page, then the cache likely now 
> > > > contains
> > > > +  * the data from that read. At this point the cache and main 
> > > > memory
> > > > +  * match up, so all good. But next the GPU needs to write some 
> > > > data to
> > > > +  * that same page. Now if the @cache_level is I915_CACHE_NONE and 
> > > > the
> > > > +  * the platform doesn't have the shared-LLC, then the GPU will
> > > > +  * effectively skip invalidating the cache(or however that works
> > > > +  * internally) when writing the new value.  This is really bad 
> > > > since the
> > > > +  * GPU has just written some new data to main memory, but the CPU 
> > > > cache
> > > > +  * is still valid and now contains stale data. As a result the 
> > > > next time
> > > > +  * we do a cached read with the CPU, we are rewarded with stale 
> > > > data.
> > > > +  * Likewise if the cache is later flushed, we might be rewarded 
> > > > with
> > > > +  * overwriting main memory with stale data.
> > > > +  *
> > > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > > +  *
> > > > +  * When writing through the CPU cache, the GPU is still coherent. 
> > > > Note
> > > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > > +  *
> > > > +  * This is never set when I915_CACHE_NONE is used for 
> > > > @cache_level,
> > > > +  * where instead we have to manually flush the caches after 
> > > > writing
> > > > +  * through the CPU cache. For other cache levels this should be 
> > > > set and
> > > > +  * the object is therefore considered coherent for both reads and 
> > > > writes
> > > > +  * through the CPU cache.
> > >
> > > I don't remember why we have this read vs. write split and this new
> > > documentation doesn't seem to really explain it either.
> >
> > Hmm, I attempted to explain that earlier:
> >
> > * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> > * the CPU cache are always coherent, regardless of the @cache_level. On
> > * snooping based platforms this is not the case, unless the full
> > * I915_CACHE_LLC or similar setting is used.
> > *
> > * As a result of this we need to track coherency separately for reads
> > * and writes, in order to avoid superfluous flushing on shared-LLC
> > * platforms, for reads.
> >
> > So AFAIK it's just because shared-LLC can be coherent for reads, while
> > also not being coherent for writes(CACHE_NONE),
>
> CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
> never heard of any mechanism that would make it only partially coherent.

What do you mean by "comes to LLC", are you talking about HAS_LLC() or
I915_CACHE_LLC?

If you set I915_CACHE_LLC, then yes it is fully coherent for both
HAS_LLC() and HAS_SNOOP().

If you set I915_CACHE_NONE, then reads are still coherent on
HAS_LLC(), for HAS_SNOOP() they are not. Or at least that is the
existing behaviour in the driver AFAIK.

>
> --
> Ville Syrjälä
> Intel


Re: [PATCH v3 3/5] drm/i915/uapi: reject caching ioctls for discrete

2021-07-13 Thread Kenneth Graunke
On Monday, July 5, 2021 6:53:08 AM PDT Matthew Auld wrote:
> It's a noop on DG1, and in the future when need to support other devices
> which let us control the coherency, then it should be an immutable
> creation time property for the BO. This will likely be controlled
> through a new gem_create_ext extension.
> 
> v2: add some kernel doc for the discrete changes, and document the
> implicit rules
> 
> Suggested-by: Daniel Vetter 
> Signed-off-by: Matthew Auld 
> Cc: Thomas Hellström 
> Cc: Maarten Lankhorst 
> Cc: Tvrtko Ursulin 
> Cc: Jordan Justen 
> Cc: Kenneth Graunke 
> Cc: Jason Ekstrand 
> Cc: Daniel Vetter 
> Cc: Ramalingam C 
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c |  6 +
>  include/uapi/drm/i915_drm.h| 29 +-
>  2 files changed, 34 insertions(+), 1 deletion(-)

This caching ioctl patch is:

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.


Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Ville Syrjälä
On Tue, Jul 13, 2021 at 05:13:37PM +0100, Matthew Auld wrote:
> On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
>  wrote:
> >
> > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > + /**
> > > +  * @cache_coherent:
> > > +  *
> > > +  * Track whether the pages are coherent with the GPU if reading or
> > > +  * writing through the CPU cache.
> > > +  *
> > > +  * This largely depends on the @cache_level, for example if the 
> > > object
> > > +  * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > +  * reads and writes through the CPU cache.
> > > +  *
> > > +  * Note that on platforms with shared-LLC support(HAS_LLC) reads 
> > > through
> > > +  * the CPU cache are always coherent, regardless of the 
> > > @cache_level. On
> > > +  * snooping based platforms this is not the case, unless the full
> > > +  * I915_CACHE_LLC or similar setting is used.
> > > +  *
> > > +  * As a result of this we need to track coherency separately for 
> > > reads
> > > +  * and writes, in order to avoid superfluous flushing on shared-LLC
> > > +  * platforms, for reads.
> > > +  *
> > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > +  *
> > > +  * When reading through the CPU cache, the GPU is still coherent. 
> > > Note
> > > +  * that no data has actually been modified here, so it might seem
> > > +  * strange that we care about this.
> > > +  *
> > > +  * As an example, if some object is mapped on the CPU with 
> > > write-back
> > > +  * caching, and we read some page, then the cache likely now 
> > > contains
> > > +  * the data from that read. At this point the cache and main memory
> > > +  * match up, so all good. But next the GPU needs to write some data 
> > > to
> > > +  * that same page. Now if the @cache_level is I915_CACHE_NONE and 
> > > the
> > > +  * the platform doesn't have the shared-LLC, then the GPU will
> > > +  * effectively skip invalidating the cache(or however that works
> > > +  * internally) when writing the new value.  This is really bad 
> > > since the
> > > +  * GPU has just written some new data to main memory, but the CPU 
> > > cache
> > > +  * is still valid and now contains stale data. As a result the next 
> > > time
> > > +  * we do a cached read with the CPU, we are rewarded with stale 
> > > data.
> > > +  * Likewise if the cache is later flushed, we might be rewarded with
> > > +  * overwriting main memory with stale data.
> > > +  *
> > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > +  *
> > > +  * When writing through the CPU cache, the GPU is still coherent. 
> > > Note
> > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > +  *
> > > +  * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > +  * where instead we have to manually flush the caches after writing
> > > +  * through the CPU cache. For other cache levels this should be set 
> > > and
> > > +  * the object is therefore considered coherent for both reads and 
> > > writes
> > > +  * through the CPU cache.
> >
> > I don't remember why we have this read vs. write split and this new
> > documentation doesn't seem to really explain it either.
> 
> Hmm, I attempted to explain that earlier:
> 
> * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> * the CPU cache are always coherent, regardless of the @cache_level. On
> * snooping based platforms this is not the case, unless the full
> * I915_CACHE_LLC or similar setting is used.
> *
> * As a result of this we need to track coherency separately for reads
> * and writes, in order to avoid superfluous flushing on shared-LLC
> * platforms, for reads.
> 
> So AFAIK it's just because shared-LLC can be coherent for reads, while
> also not being coherent for writes(CACHE_NONE),

CPU vs. GPU is fully coherent when it comes to LLC. Or at least I've
never heard of any mechanism that would make it only partially coherent.

-- 
Ville Syrjälä
Intel


Re: [PATCH v4 14/18] drm/msm: Don't break exclusive fence ordering

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 7:42 PM Rob Clark  wrote:
> On Tue, Jul 13, 2021 at 9:58 AM Daniel Vetter  wrote:
> >
> > On Tue, Jul 13, 2021 at 6:51 PM Rob Clark  wrote:
> > >
> > > On Mon, Jul 12, 2021 at 1:02 PM Daniel Vetter  
> > > wrote:
> > > >
> > > > There's only one exclusive slot, and we must not break the ordering.
> > > >
> > > > Adding a new exclusive fence drops all previous fences from the
> > > > dma_resv. To avoid violating the signalling order we err on the side of
> > > > over-synchronizing by waiting for the existing fences, even if
> > > > userspace asked us to ignore them.
> > > >
> > > > A better fix would be to us a dma_fence_chain or _array like e.g.
> > > > amdgpu now uses, but
> > > > - msm has a synchronous dma_fence_wait for anything from another
> > > >   context, so doesn't seem to care much,
> > > > - and it probably makes sense to lift this into dma-resv.c code as a
> > > >   proper concept, so that drivers don't have to hack up their own
> > > >   solution each on their own.
> > > >
> > > > v2: Improve commit message per Lucas' suggestion.
> > > >
> > > > Cc: Lucas Stach 
> > > > Signed-off-by: Daniel Vetter 
> > > > Cc: Rob Clark 
> > > > Cc: Sean Paul 
> > > > Cc: linux-arm-...@vger.kernel.org
> > > > Cc: freedr...@lists.freedesktop.org
> > > > ---
> > > >  drivers/gpu/drm/msm/msm_gem_submit.c | 3 ++-
> > > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
> > > > b/drivers/gpu/drm/msm/msm_gem_submit.c
> > > > index b71da71a3dd8..edd0051d849f 100644
> > > > --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> > > > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> > > > @@ -306,7 +306,8 @@ static int submit_fence_sync(struct msm_gem_submit 
> > > > *submit, bool no_implicit)
> > > > return ret;
> > > > }
> > > >
> > > > -   if (no_implicit)
> > > > +   /* exclusive fences must be ordered */
> > > > +   if (no_implicit && !write)
> > > > continue;
> > >
> > > In practice, modern userspace (the kind that is more likely to set the
> > > no-implicit flag on every submit) also sets MSM_SUBMIT_BO_WRITE on
> > > every bo, to shave some cpu overhead so I suppose this would not
> > > really hurt anything
> > >
> > > Do you know if this is covered in any piglit/etc test?
> >
> > You need some command submission, plus buffer sharing with vgem
> > setting it's own exclusive fences, plus checking with dma_buf poll()
> > whether it signals all in the right order. That's pretty low-level, so
> > maybe something in igt, but I haven't typed that. Maybe I need to do
> > that for i915 at least.
>
> ok, you lost me at vgem ;-)
>
> (the vgem vs cache situation on arm is kinda hopeless)

Oh that explains a few things ... I just found out why vgem is failing
for wc buffers on x86 (on some of our less-coherent igpu at least),
and wondered how the heck this works on arm. Sounds like it just
doesn't :-/

On the testcase: You'd never actually check buffer contents, only
fences, so the test would still work.
-Daniel
>
> BR,
> -R
>
> > -Daniel
> >
> > > BR,
> > > -R
> > >
> > > >
> > > > ret = msm_gem_sync_object(_obj->base, 
> > > > submit->ring->fctx,
> > > > --
> > > > 2.32.0
> > > >
> >
> >
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [RFC] drm: return int error code from mode_fixup

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 7:14 PM Grace An  wrote:
> When CONFIG_PROVE_LOCKING is defined, the kernel randomly injects
> -EDEADLK errors for all the ww_mutex. This results in
> drm_atomic_get_private_obj_state randomly returning -EDEADLK.
> However, the mode_fixup functions do not propagate these error
> codes and return false, causing the atomic commit to fail with
> -EINVAL instead of retrying.
>
> Change encoder, crtc, and bridge mode_fixup functions to return
> an int instead of a boolean to indicate success or failure. If
> any of these functions fail, the mode_fixup function now returns
> the provided integer error code instead of -EINVAL.
>
> This change needs modifications across drivers, but before submitting
> the entire change, we want to get feedback on this RFC.
>
> Signed-off-by: Grace An 

Why don't you just use the various atomic_check hooks we have for
this? There you get passed the state and everything, have a full int
return value, and things actually work.

->mode_fixup is for compatibility with legacy crtc modeset helpers
from the pre-atomic times. If the kerneldoc isn't clear yet, please do
a patch to fix that up so that @mode_fixup points at the relevant
@atomic_check as the recommended function.
-Daniel

> ---
>  drivers/gpu/drm/drm_atomic_helper.c  | 8 
>  drivers/gpu/drm/drm_bridge.c | 4 ++--
>  include/drm/drm_bridge.h | 2 +-
>  include/drm/drm_modeset_helper_vtables.h | 4 ++--
>  4 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c 
> b/drivers/gpu/drm/drm_atomic_helper.c
> index f2b3e28..d75f09a 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -457,10 +457,10 @@ mode_fixup(struct drm_atomic_state *state)
> } else if (funcs && funcs->mode_fixup) {
> ret = funcs->mode_fixup(encoder, 
> _crtc_state->mode,
> 
> _crtc_state->adjusted_mode);
> -   if (!ret) {
> +   if (ret) {
> DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] fixup 
> failed\n",
>  encoder->base.id, 
> encoder->name);
> -   return -EINVAL;
> +   return ret;
> }
> }
> }
> @@ -481,10 +481,10 @@ mode_fixup(struct drm_atomic_state *state)
>
> ret = funcs->mode_fixup(crtc, _crtc_state->mode,
> _crtc_state->adjusted_mode);
> -   if (!ret) {
> +   if (ret) {
> DRM_DEBUG_ATOMIC("[CRTC:%d:%s] fixup failed\n",
>  crtc->base.id, crtc->name);
> -   return -EINVAL;
> +   return ret;
> }
> }
>
> diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
> index 64f0eff..3ad16b5 100644
> --- a/drivers/gpu/drm/drm_bridge.c
> +++ b/drivers/gpu/drm/drm_bridge.c
> @@ -736,9 +736,9 @@ static int drm_atomic_bridge_check(struct drm_bridge 
> *bridge,
> if (ret)
> return ret;
> } else if (bridge->funcs->mode_fixup) {
> -   if (!bridge->funcs->mode_fixup(bridge, _state->mode,
> +   if (bridge->funcs->mode_fixup(bridge, _state->mode,
>_state->adjusted_mode))
> -   return -EINVAL;
> +   return ret;
> }
>
> return 0;
> diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
> index 2195daa..5d02dfc 100644
> --- a/include/drm/drm_bridge.h
> +++ b/include/drm/drm_bridge.h
> @@ -153,7 +153,7 @@ struct drm_bridge_funcs {
>  * True if an acceptable configuration is possible, false if the 
> modeset
>  * operation should be rejected.
>  */
> -   bool (*mode_fixup)(struct drm_bridge *bridge,
> +   int (*mode_fixup)(struct drm_bridge *bridge,
>const struct drm_display_mode *mode,
>struct drm_display_mode *adjusted_mode);
> /**
> diff --git a/include/drm/drm_modeset_helper_vtables.h 
> b/include/drm/drm_modeset_helper_vtables.h
> index f3a4b47..e305c97 100644
> --- a/include/drm/drm_modeset_helper_vtables.h
> +++ b/include/drm/drm_modeset_helper_vtables.h
> @@ -184,7 +184,7 @@ struct drm_crtc_helper_funcs {
>  * True if an acceptable configuration is possible, false if the 
> modeset
>  * operation should be rejected.
>  */
> -   bool (*mode_fixup)(struct drm_crtc *crtc,
> +   int (*mode_fixup)(struct drm_crtc *crtc,
>const struct drm_display_mode *mode,
>struct drm_display_mode *adjusted_mode);
>
> @@ -599,7 +599,7 @@ struct 

Re: [PATCH v4 14/18] drm/msm: Don't break exclusive fence ordering

2021-07-13 Thread Rob Clark
On Tue, Jul 13, 2021 at 9:58 AM Daniel Vetter  wrote:
>
> On Tue, Jul 13, 2021 at 6:51 PM Rob Clark  wrote:
> >
> > On Mon, Jul 12, 2021 at 1:02 PM Daniel Vetter  
> > wrote:
> > >
> > > There's only one exclusive slot, and we must not break the ordering.
> > >
> > > Adding a new exclusive fence drops all previous fences from the
> > > dma_resv. To avoid violating the signalling order we err on the side of
> > > over-synchronizing by waiting for the existing fences, even if
> > > userspace asked us to ignore them.
> > >
> > > A better fix would be to us a dma_fence_chain or _array like e.g.
> > > amdgpu now uses, but
> > > - msm has a synchronous dma_fence_wait for anything from another
> > >   context, so doesn't seem to care much,
> > > - and it probably makes sense to lift this into dma-resv.c code as a
> > >   proper concept, so that drivers don't have to hack up their own
> > >   solution each on their own.
> > >
> > > v2: Improve commit message per Lucas' suggestion.
> > >
> > > Cc: Lucas Stach 
> > > Signed-off-by: Daniel Vetter 
> > > Cc: Rob Clark 
> > > Cc: Sean Paul 
> > > Cc: linux-arm-...@vger.kernel.org
> > > Cc: freedr...@lists.freedesktop.org
> > > ---
> > >  drivers/gpu/drm/msm/msm_gem_submit.c | 3 ++-
> > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
> > > b/drivers/gpu/drm/msm/msm_gem_submit.c
> > > index b71da71a3dd8..edd0051d849f 100644
> > > --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> > > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> > > @@ -306,7 +306,8 @@ static int submit_fence_sync(struct msm_gem_submit 
> > > *submit, bool no_implicit)
> > > return ret;
> > > }
> > >
> > > -   if (no_implicit)
> > > +   /* exclusive fences must be ordered */
> > > +   if (no_implicit && !write)
> > > continue;
> >
> > In practice, modern userspace (the kind that is more likely to set the
> > no-implicit flag on every submit) also sets MSM_SUBMIT_BO_WRITE on
> > every bo, to shave some cpu overhead so I suppose this would not
> > really hurt anything
> >
> > Do you know if this is covered in any piglit/etc test?
>
> You need some command submission, plus buffer sharing with vgem
> setting it's own exclusive fences, plus checking with dma_buf poll()
> whether it signals all in the right order. That's pretty low-level, so
> maybe something in igt, but I haven't typed that. Maybe I need to do
> that for i915 at least.

ok, you lost me at vgem ;-)

(the vgem vs cache situation on arm is kinda hopeless)

BR,
-R

> -Daniel
>
> > BR,
> > -R
> >
> > >
> > > ret = msm_gem_sync_object(_obj->base, 
> > > submit->ring->fctx,
> > > --
> > > 2.32.0
> > >
>
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch


Re: [PATCH v3 0/2] allow simple{fb, drm} drivers to be used on non-x86 EFI platforms

2021-07-13 Thread Javier Martinez Canillas
On 6/25/21 3:09 PM, Javier Martinez Canillas wrote:
> The simplefb and simpledrm drivers match against a "simple-framebuffer"
> device, but for aarch64 this is only registered when using Device Trees
> and there's a node with a "simple-framebuffer" compatible string.
> 
> There is no code to register a "simple-framebuffer" platform device when
> using EFI instead. In fact, the only platform device that's registered in
> this case is an "efi-framebuffer", which means that the efifb driver is
> the only driver supported to have an early console with EFI on aarch64.
> 
> The x86 architecture platform has a Generic System Framebuffers (sysfb)
> support, that register a system frambuffer platform device. It either
> registers a "simple-framebuffer" for the simple{fb,drm} drivers or legacy
> VGA/EFI FB devices for the vgafb/efifb drivers.
> 
> The sysfb is generic enough to be reused by other architectures and can be
> moved out of the arch/x86 directory to drivers/firmware, allowing the EFI
> logic used by non-x86 architectures to be folded into sysfb as well.
> 

Any more comments on this series? It would be nice for this to land so the
simpledrm driver could be used on aarch64 EFI systems as well.

The patches have already been acked by x86 and DRM folks.

Best regards,
-- 
Javier Martinez Canillas
Linux Engineering



Re: [PATCH v4 14/18] drm/msm: Don't break exclusive fence ordering

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 6:51 PM Rob Clark  wrote:
>
> On Mon, Jul 12, 2021 at 1:02 PM Daniel Vetter  wrote:
> >
> > There's only one exclusive slot, and we must not break the ordering.
> >
> > Adding a new exclusive fence drops all previous fences from the
> > dma_resv. To avoid violating the signalling order we err on the side of
> > over-synchronizing by waiting for the existing fences, even if
> > userspace asked us to ignore them.
> >
> > A better fix would be to us a dma_fence_chain or _array like e.g.
> > amdgpu now uses, but
> > - msm has a synchronous dma_fence_wait for anything from another
> >   context, so doesn't seem to care much,
> > - and it probably makes sense to lift this into dma-resv.c code as a
> >   proper concept, so that drivers don't have to hack up their own
> >   solution each on their own.
> >
> > v2: Improve commit message per Lucas' suggestion.
> >
> > Cc: Lucas Stach 
> > Signed-off-by: Daniel Vetter 
> > Cc: Rob Clark 
> > Cc: Sean Paul 
> > Cc: linux-arm-...@vger.kernel.org
> > Cc: freedr...@lists.freedesktop.org
> > ---
> >  drivers/gpu/drm/msm/msm_gem_submit.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
> > b/drivers/gpu/drm/msm/msm_gem_submit.c
> > index b71da71a3dd8..edd0051d849f 100644
> > --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> > @@ -306,7 +306,8 @@ static int submit_fence_sync(struct msm_gem_submit 
> > *submit, bool no_implicit)
> > return ret;
> > }
> >
> > -   if (no_implicit)
> > +   /* exclusive fences must be ordered */
> > +   if (no_implicit && !write)
> > continue;
>
> In practice, modern userspace (the kind that is more likely to set the
> no-implicit flag on every submit) also sets MSM_SUBMIT_BO_WRITE on
> every bo, to shave some cpu overhead so I suppose this would not
> really hurt anything
>
> Do you know if this is covered in any piglit/etc test?

You need some command submission, plus buffer sharing with vgem
setting it's own exclusive fences, plus checking with dma_buf poll()
whether it signals all in the right order. That's pretty low-level, so
maybe something in igt, but I haven't typed that. Maybe I need to do
that for i915 at least.
-Daniel

> BR,
> -R
>
> >
> > ret = msm_gem_sync_object(_obj->base, 
> > submit->ring->fctx,
> > --
> > 2.32.0
> >



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH v4 14/18] drm/msm: Don't break exclusive fence ordering

2021-07-13 Thread Rob Clark
On Mon, Jul 12, 2021 at 1:02 PM Daniel Vetter  wrote:
>
> There's only one exclusive slot, and we must not break the ordering.
>
> Adding a new exclusive fence drops all previous fences from the
> dma_resv. To avoid violating the signalling order we err on the side of
> over-synchronizing by waiting for the existing fences, even if
> userspace asked us to ignore them.
>
> A better fix would be to us a dma_fence_chain or _array like e.g.
> amdgpu now uses, but
> - msm has a synchronous dma_fence_wait for anything from another
>   context, so doesn't seem to care much,
> - and it probably makes sense to lift this into dma-resv.c code as a
>   proper concept, so that drivers don't have to hack up their own
>   solution each on their own.
>
> v2: Improve commit message per Lucas' suggestion.
>
> Cc: Lucas Stach 
> Signed-off-by: Daniel Vetter 
> Cc: Rob Clark 
> Cc: Sean Paul 
> Cc: linux-arm-...@vger.kernel.org
> Cc: freedr...@lists.freedesktop.org
> ---
>  drivers/gpu/drm/msm/msm_gem_submit.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c 
> b/drivers/gpu/drm/msm/msm_gem_submit.c
> index b71da71a3dd8..edd0051d849f 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -306,7 +306,8 @@ static int submit_fence_sync(struct msm_gem_submit 
> *submit, bool no_implicit)
> return ret;
> }
>
> -   if (no_implicit)
> +   /* exclusive fences must be ordered */
> +   if (no_implicit && !write)
> continue;

In practice, modern userspace (the kind that is more likely to set the
no-implicit flag on every submit) also sets MSM_SUBMIT_BO_WRITE on
every bo, to shave some cpu overhead so I suppose this would not
really hurt anything

Do you know if this is covered in any piglit/etc test?

BR,
-R

>
> ret = msm_gem_sync_object(_obj->base, submit->ring->fctx,
> --
> 2.32.0
>


Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 6:14 PM Matthew Auld
 wrote:
> On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
>  wrote:
> >
> > On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > > + /**
> > > +  * @cache_coherent:
> > > +  *
> > > +  * Track whether the pages are coherent with the GPU if reading or
> > > +  * writing through the CPU cache.
> > > +  *
> > > +  * This largely depends on the @cache_level, for example if the 
> > > object
> > > +  * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > > +  * reads and writes through the CPU cache.
> > > +  *
> > > +  * Note that on platforms with shared-LLC support(HAS_LLC) reads 
> > > through
> > > +  * the CPU cache are always coherent, regardless of the 
> > > @cache_level. On
> > > +  * snooping based platforms this is not the case, unless the full
> > > +  * I915_CACHE_LLC or similar setting is used.
> > > +  *
> > > +  * As a result of this we need to track coherency separately for 
> > > reads
> > > +  * and writes, in order to avoid superfluous flushing on shared-LLC
> > > +  * platforms, for reads.
> > > +  *
> > > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > > +  *
> > > +  * When reading through the CPU cache, the GPU is still coherent. 
> > > Note
> > > +  * that no data has actually been modified here, so it might seem
> > > +  * strange that we care about this.
> > > +  *
> > > +  * As an example, if some object is mapped on the CPU with 
> > > write-back
> > > +  * caching, and we read some page, then the cache likely now 
> > > contains
> > > +  * the data from that read. At this point the cache and main memory
> > > +  * match up, so all good. But next the GPU needs to write some data 
> > > to
> > > +  * that same page. Now if the @cache_level is I915_CACHE_NONE and 
> > > the
> > > +  * the platform doesn't have the shared-LLC, then the GPU will
> > > +  * effectively skip invalidating the cache(or however that works
> > > +  * internally) when writing the new value.  This is really bad 
> > > since the
> > > +  * GPU has just written some new data to main memory, but the CPU 
> > > cache
> > > +  * is still valid and now contains stale data. As a result the next 
> > > time
> > > +  * we do a cached read with the CPU, we are rewarded with stale 
> > > data.
> > > +  * Likewise if the cache is later flushed, we might be rewarded with
> > > +  * overwriting main memory with stale data.
> > > +  *
> > > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > > +  *
> > > +  * When writing through the CPU cache, the GPU is still coherent. 
> > > Note
> > > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > > +  *
> > > +  * This is never set when I915_CACHE_NONE is used for @cache_level,
> > > +  * where instead we have to manually flush the caches after writing
> > > +  * through the CPU cache. For other cache levels this should be set 
> > > and
> > > +  * the object is therefore considered coherent for both reads and 
> > > writes
> > > +  * through the CPU cache.
> >
> > I don't remember why we have this read vs. write split and this new
> > documentation doesn't seem to really explain it either.
>
> Hmm, I attempted to explain that earlier:
>
> * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> * the CPU cache are always coherent, regardless of the @cache_level. On
> * snooping based platforms this is not the case, unless the full
> * I915_CACHE_LLC or similar setting is used.
> *
> * As a result of this we need to track coherency separately for reads
> * and writes, in order to avoid superfluous flushing on shared-LLC
> * platforms, for reads.
>
> So AFAIK it's just because shared-LLC can be coherent for reads, while
> also not being coherent for writes(CACHE_NONE), so being able to track
> each separately is kind of needed to avoid unnecessary flushing for
> the read cases i.e simple boolean for coherent vs non-coherent is not
> enough.
>
> I can try to reword things to make that more clear.

Maybe highlight the security aspect a bit more: When reads are always
coherent, we don't have to force the clflush. If reads are not
coherent we must ensure that the clflush has finished before userspace
can get at the backing storage, like writing ptes and similar things.
Writes otoh can only result in userspace eating cacheling corruption
if it races against the kernel (by e.g. trying to predict where we'll
bind a buffer and issuing gpu access to that location before the
buffer is actually bound from some other engine in parallel with an
execbuf that binds the buffer).

Atm we don't do a great job with that, but that's something that I
think is getting looked into.
-Daniel

> > Is it for optimizing some display related case where we can omit the
> > invalidates but still have to do the writeback to keep the 

Re: [PATCH] drm/stm: dsi: compute the transition time from LP to HS and back

2021-07-13 Thread Philippe CORNU

Hi Antonio,

On 7/13/21 4:49 PM, Antonio Borneo wrote:

The driver uses a conservative set of hardcoded values for the
maximum time delay of the transitions between LP and HS, either
for data and clock lanes.

By using the info in STM32MP157 datasheet, valid also for other ST
devices, compute the actual delay from the lane's bps.

Signed-off-by: Antonio Borneo 
---
To: Yannick Fertre 
To: Philippe Cornu 
To: Benjamin Gaignard 
To: David Airlie 
To: Daniel Vetter 
To: Maxime Coquelin 
To: Alexandre Torgue 
To: Raphael Gallais-Pou 
To: dri-devel@lists.freedesktop.org
To: linux-st...@st-md-mailman.stormreply.com
To: linux-arm-ker...@lists.infradead.org
Cc: linux-ker...@vger.kernel.org

  drivers/gpu/drm/stm/dw_mipi_dsi-stm.c | 17 +
  1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c 
b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
index 8399d337589d..32cb41b2202f 100644
--- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
+++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
@@ -309,14 +309,23 @@ dw_mipi_dsi_get_lane_mbps(void *priv_data, const struct 
drm_display_mode *mode,
return 0;
  }
  
+#define DSI_PHY_DELAY(fp, vp, mbps) DIV_ROUND_UP((fp) * (mbps) + 1000 * (vp), 8000)

+
  static int
  dw_mipi_dsi_phy_get_timing(void *priv_data, unsigned int lane_mbps,
   struct dw_mipi_dsi_dphy_timing *timing)
  {
-   timing->clk_hs2lp = 0x40;
-   timing->clk_lp2hs = 0x40;
-   timing->data_hs2lp = 0x40;
-   timing->data_lp2hs = 0x40;
+   /*
+* From STM32MP157 datasheet, valid for STM32F469, STM32F7x9, STM32H747
+* phy_clkhs2lp_time = (272+136*UI)/(8*UI)
+* phy_clklp2hs_time = (512+40*UI)/(8*UI)
+* phy_hs2lp_time = (192+64*UI)/(8*UI)
+* phy_lp2hs_time = (256+32*UI)/(8*UI)
+*/
+   timing->clk_hs2lp = DSI_PHY_DELAY(272, 136, lane_mbps);
+   timing->clk_lp2hs = DSI_PHY_DELAY(512, 40, lane_mbps);
+   timing->data_hs2lp = DSI_PHY_DELAY(192, 64, lane_mbps);
+   timing->data_lp2hs = DSI_PHY_DELAY(256, 32, lane_mbps);


Many thanks for your patch.

Reviewed-by: Philippe Cornu 
Acked-by: Philippe Cornu 

I will apply it on drm-misc-next early next week,

Philippe :-)

  
  	return 0;

  }

base-commit: 35d283658a6196b2057be562096610c6793e1219



Re: [PATCH v4 02/18] drm/sched: Barriers are needed for entity->last_scheduled

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 6:11 PM Andrey Grodzovsky
 wrote:
> On 2021-07-13 5:10 a.m., Daniel Vetter wrote:
> > On Tue, Jul 13, 2021 at 9:25 AM Christian König
> >  wrote:
> >> Am 13.07.21 um 08:50 schrieb Daniel Vetter:
> >>> On Tue, Jul 13, 2021 at 8:35 AM Christian König
> >>>  wrote:
>  Am 12.07.21 um 19:53 schrieb Daniel Vetter:
> > It might be good enough on x86 with just READ_ONCE, but the write side
> > should then at least be WRITE_ONCE because x86 has total store order.
> >
> > It's definitely not enough on arm.
> >
> > Fix this proplery, which means
> > - explain the need for the barrier in both places
> > - point at the other side in each comment
> >
> > Also pull out the !sched_list case as the first check, so that the
> > code flow is clearer.
> >
> > While at it sprinkle some comments around because it was very
> > non-obvious to me what's actually going on here and why.
> >
> > Note that we really need full barriers here, at first I thought
> > store-release and load-acquire on ->last_scheduled would be enough,
> > but we actually requiring ordering between that and the queue state.
> >
> > v2: Put smp_rmp() in the right place and fix up comment (Andrey)
> >
> > Signed-off-by: Daniel Vetter 
> > Cc: "Christian König" 
> > Cc: Steven Price 
> > Cc: Daniel Vetter 
> > Cc: Andrey Grodzovsky 
> > Cc: Lee Jones 
> > Cc: Boris Brezillon 
> > ---
> > drivers/gpu/drm/scheduler/sched_entity.c | 27 
> > ++--
> > 1 file changed, 25 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
> > b/drivers/gpu/drm/scheduler/sched_entity.c
> > index f7347c284886..89e3f6eaf519 100644
> > --- a/drivers/gpu/drm/scheduler/sched_entity.c
> > +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> > @@ -439,8 +439,16 @@ struct drm_sched_job 
> > *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
> > dma_fence_set_error(_job->s_fence->finished, 
> > -ECANCELED);
> >
> > dma_fence_put(entity->last_scheduled);
> > +
> > entity->last_scheduled = 
> > dma_fence_get(_job->s_fence->finished);
> >
> > + /*
> > +  * If the queue is empty we allow drm_sched_entity_select_rq() to
> > +  * locklessly access ->last_scheduled. This only works if we set 
> > the
> > +  * pointer before we dequeue and if we a write barrier here.
> > +  */
> > + smp_wmb();
> > +
>  Again, conceptual those barriers should be part of the spsc_queue
>  container and not externally.
> >>> That would be extremely unusual api. Let's assume that your queue is
> >>> very dumb, and protected by a simple lock. That's about the maximum
> >>> any user could expect.
> >>>
> >>> But then you still need barriers here, because linux locks (spinlock,
> >>> mutex) are defined to be one-way barriers: Stuff that's inside is
> >>> guaranteed to be done insinde, but stuff outside of the locked region
> >>> can leak in. They're load-acquire/store-release barriers. So not good
> >>> enough.
> >>>
> >>> You really need to have barriers here, and they really all need to be
> >>> documented properly. And yes that's a shit-ton of work in drm/sched,
> >>> because it's full of yolo lockless stuff.
> >>>
> >>> The other case you could make is that this works like a wakeup queue,
> >>> or similar. The rules there are:
> >>> - wake_up (i.e. pushing something into the queue) is a store-release 
> >>> barrier
> >>> - the waked up (i.e. popping an entry) is a load acquire barrier
> >>> Which is obviuosly needed because otherwise you don't have coherency
> >>> for the data queued up. And again not the barriers you're locking for
> >>> here.
> >> Exactly that was the idea, yes.
> >>
> >>> Either way, we'd still need the comments, because it's still lockless
> >>> trickery, and every single one of that needs to have a comment on both
> >>> sides to explain what's going on.
> >>>
> >>> Essentially replace spsc_queue with an llist underneath, and that's
> >>> the amount of barriers a data structure should provide. Anything else
> >>> is asking your datastructure to paper over bugs in your users.
> >>>
> >>> This is similar to how atomic_t is by default completely unordered,
> >>> and users need to add barriers as needed, with comments.
> >> My main problem is as always that kernel atomics work different than
> >> userspace atomics.
> >>
> >>> I think this is all to make sure people don't just write lockless 
> >>> algorithms
> >>> because it's a cool idea, but are forced to think this all through.
> >>> Which seems to not have happened very consistently for drm/sched, so I
> >>> guess needs to be fixed.
> >> Well at least initially that was all perfectly thought through. The
> >> problem is nobody is really maintaining that stuff.
> >>
> 

Re: [PATCH] drm/stm: ltdc: Silence -EPROBE_DEFER till bridge attached

2021-07-13 Thread Philippe CORNU

Hi Jagan,

On 7/4/21 3:59 PM, Jagan Teki wrote:

As dw-mipi-dsi supported all possible ways to find the DSI
devices. It can take multiple iterations for ltdc to find
all components attached to the DSI bridge.

The current ltdc driver failed to find the endpoint as
it returned -EINVAL for the first iteration itself. This leads
to following error:

[    3.099289] [drm:ltdc_load] *ERROR* init encoder endpoint 0

So, check the return value and cleanup the encoder only if it's
not -EPROBE_DEFER. This make all components in the attached DSI
bridge found properly.

Signed-off-by: Jagan Teki 
---
  drivers/gpu/drm/stm/ltdc.c | 8 +---
  1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 08b71248044d..95e983d3ffb5 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -1122,8 +1122,9 @@ static int ltdc_encoder_init(struct drm_device *ddev, 
struct drm_bridge *bridge)
  
  	ret = drm_bridge_attach(encoder, bridge, NULL, 0);

if (ret) {
-   drm_encoder_cleanup(encoder);
-   return -EINVAL;
+   if (ret != -EPROBE_DEFER)
+   drm_encoder_cleanup(encoder);


Many thanks for your patch.

This means that we are counting on the future success of the deferred 
probe because we do not clean the encoder...

However, Yannick gave his "Tested-by" and this patch seems useful so

Acked-by: Philippe Cornu 

I will merge it friday or early next week,

Thank you
Philippe :-)



+   return ret;
}
  
  	DRM_DEBUG_DRIVER("Bridge encoder:%d created\n", encoder->base.id);

@@ -1266,7 +1267,8 @@ int ltdc_load(struct drm_device *ddev)
if (bridge) {
ret = ltdc_encoder_init(ddev, bridge);
if (ret) {
-   DRM_ERROR("init encoder endpoint %d\n", i);
+   if (ret != -EPROBE_DEFER)
+   DRM_ERROR("init encoder endpoint %d\n", 
i);
goto err;
}
}



Re: [Freedreno] [PATCH] drm/msm/dsi: add support for dsi test pattern generator

2021-07-13 Thread Rob Clark
On Tue, Jun 29, 2021 at 12:04 PM Abhinav Kumar  wrote:
>
> During board bringups its useful to have a DSI test pattern
> generator to isolate a DPU vs a DSI issue and focus on the relevant
> hardware block.
>
> To facilitate this, add an API which triggers the DSI controller
> test pattern. The expected output is a rectangular checkered pattern.
>
> This has been validated on a single DSI video mode panel by calling it
> right after drm_panel_enable() which is also the ideal location to use
> this as the DSI host and the panel have been initialized by then.
>
> Further validation on dual DSI and command mode panel is pending.
> If there are any fix ups needed for those, it shall be applied on top
> of this change.
>
> Signed-off-by: Abhinav Kumar 
> ---
>  drivers/gpu/drm/msm/dsi/dsi.h |  3 ++
>  drivers/gpu/drm/msm/dsi/dsi.xml.h |  9 ++
>  drivers/gpu/drm/msm/dsi/dsi_host.c| 53 
> +++
>  drivers/gpu/drm/msm/dsi/dsi_manager.c | 13 +
>  4 files changed, 78 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
> index 9b8e9b0..663ccbd 100644
> --- a/drivers/gpu/drm/msm/dsi/dsi.h
> +++ b/drivers/gpu/drm/msm/dsi/dsi.h
> @@ -84,6 +84,7 @@ void msm_dsi_manager_setup_encoder(int id);
>  int msm_dsi_manager_register(struct msm_dsi *msm_dsi);
>  void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi);
>  bool msm_dsi_manager_validate_current_config(u8 id);
> +void msm_dsi_manager_tpg_enable(void);
>
>  /* msm dsi */
>  static inline bool msm_dsi_device_connected(struct msm_dsi *msm_dsi)
> @@ -148,6 +149,8 @@ int dsi_clk_init_6g_v2(struct msm_dsi_host *msm_host);
>  int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_dual_dsi);
>  int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_dual_dsi);
>  void msm_dsi_host_snapshot(struct msm_disp_state *disp_state, struct 
> mipi_dsi_host *host);
> +void msm_dsi_host_test_pattern_en(struct mipi_dsi_host *host);
> +
>  /* dsi phy */
>  struct msm_dsi_phy;
>  struct msm_dsi_phy_shared_timings {
> diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h 
> b/drivers/gpu/drm/msm/dsi/dsi.xml.h
> index eadbcc7..f7dcf49 100644
> --- a/drivers/gpu/drm/msm/dsi/dsi.xml.h
> +++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
> @@ -624,6 +624,15 @@ static inline uint32_t DSI_RDBK_DATA_CTRL_COUNT(uint32_t 
> val)
>  #define REG_DSI_VERSION
> 0x01f0
>  #define DSI_VERSION_MAJOR__MASK
> 0xff00
>  #define DSI_VERSION_MAJOR__SHIFT   24
> +
> +#define REG_DSI_TEST_PATTERN_GEN_VIDEO_INIT_VAL 0x0160
> +#define REG_DSI_TPG_MAIN_CONTROL   0x0198
> +#define REG_DSI_TPG_VIDEO_CONFIG   0x01a0
> +#define REG_DSI_TEST_PATTERN_GEN_CTRL  0x0158
> +#define REG_DSI_TEST_PATTERN_GEN_CMD_MDP_INIT_VAL0 0x0168
> +#define REG_DSI_TEST_PATTERN_GEN_CMD_STREAM0_TRIGGER   0x0180
> +#define REG_DSI_TPG_MAIN_CONTROL2  0x019c

We should add these in dsi.xml (either in
mesa.git/src/freedreno/registers/dsi/dsi.xml or in
envytools.git/registers/dsi/dsi.xml) and regenerate dsi.xml.h

And if possible, it would be nice to define the bitfields instead of
just open-coding the register values

BR,
-R

> +
>  static inline uint32_t DSI_VERSION_MAJOR(uint32_t val)
>  {
> return ((val) << DSI_VERSION_MAJOR__SHIFT) & DSI_VERSION_MAJOR__MASK;
> diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c 
> b/drivers/gpu/drm/msm/dsi/dsi_host.c
> index ed504fe..24d44b0 100644
> --- a/drivers/gpu/drm/msm/dsi/dsi_host.c
> +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
> @@ -2495,3 +2495,56 @@ void msm_dsi_host_snapshot(struct msm_disp_state 
> *disp_state, struct mipi_dsi_ho
>
> pm_runtime_put_sync(_host->pdev->dev);
>  }
> +
> +static void msm_dsi_host_video_test_pattern_setup(struct msm_dsi_host 
> *msm_host)
> +{
> +   u32 reg;
> +
> +   reg = dsi_read(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL);
> +
> +   dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_VIDEO_INIT_VAL, 0xff);
> +   /* draw checkered rectangle pattern */
> +   dsi_write(msm_host, REG_DSI_TPG_MAIN_CONTROL, 0x100);
> +   /* use 24-bit RGB test pttern */
> +   dsi_write(msm_host, REG_DSI_TPG_VIDEO_CONFIG, 0x5);
> +
> +   reg |= (0x3 << 4);
> +   dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL, reg);
> +
> +   DBG("Video test pattern setup done\n");
> +}
> +
> +static void msm_dsi_host_cmd_test_pattern_setup(struct msm_dsi_host 
> *msm_host)
> +{
> +   u32 reg;
> +
> +   reg = dsi_read(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL);
> +
> +   dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CMD_MDP_INIT_VAL0, 0xff);
> +
> +   reg |= (0x3 << 0x8);
> +   dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL, reg);
> +   /* draw checkered rectangle pattern */
> +   

[PATCH] drm/stm: dsi: compute the transition time from LP to HS and back

2021-07-13 Thread Antonio Borneo
The driver uses a conservative set of hardcoded values for the
maximum time delay of the transitions between LP and HS, either
for data and clock lanes.

By using the info in STM32MP157 datasheet, valid also for other ST
devices, compute the actual delay from the lane's bps.

Signed-off-by: Antonio Borneo 
---
To: Yannick Fertre 
To: Philippe Cornu 
To: Benjamin Gaignard 
To: David Airlie 
To: Daniel Vetter 
To: Maxime Coquelin 
To: Alexandre Torgue 
To: Raphael Gallais-Pou 
To: dri-devel@lists.freedesktop.org
To: linux-st...@st-md-mailman.stormreply.com
To: linux-arm-ker...@lists.infradead.org
Cc: linux-ker...@vger.kernel.org

 drivers/gpu/drm/stm/dw_mipi_dsi-stm.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c 
b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
index 8399d337589d..32cb41b2202f 100644
--- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
+++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
@@ -309,14 +309,23 @@ dw_mipi_dsi_get_lane_mbps(void *priv_data, const struct 
drm_display_mode *mode,
return 0;
 }
 
+#define DSI_PHY_DELAY(fp, vp, mbps) DIV_ROUND_UP((fp) * (mbps) + 1000 * (vp), 
8000)
+
 static int
 dw_mipi_dsi_phy_get_timing(void *priv_data, unsigned int lane_mbps,
   struct dw_mipi_dsi_dphy_timing *timing)
 {
-   timing->clk_hs2lp = 0x40;
-   timing->clk_lp2hs = 0x40;
-   timing->data_hs2lp = 0x40;
-   timing->data_lp2hs = 0x40;
+   /*
+* From STM32MP157 datasheet, valid for STM32F469, STM32F7x9, STM32H747
+* phy_clkhs2lp_time = (272+136*UI)/(8*UI)
+* phy_clklp2hs_time = (512+40*UI)/(8*UI)
+* phy_hs2lp_time = (192+64*UI)/(8*UI)
+* phy_lp2hs_time = (256+32*UI)/(8*UI)
+*/
+   timing->clk_hs2lp = DSI_PHY_DELAY(272, 136, lane_mbps);
+   timing->clk_lp2hs = DSI_PHY_DELAY(512, 40, lane_mbps);
+   timing->data_hs2lp = DSI_PHY_DELAY(192, 64, lane_mbps);
+   timing->data_lp2hs = DSI_PHY_DELAY(256, 32, lane_mbps);
 
return 0;
 }

base-commit: 35d283658a6196b2057be562096610c6793e1219
-- 
2.32.0



[PATCH] video: fbdev: kyrofb: fix a DoS bug by restricting user input

2021-07-13 Thread Zheyu Ma
The user can pass in any value to the driver through the 'ioctl'
interface. The driver dost not check, which may cause DoS bugs.

Fix this by checking if the divisor is 0

The following log reveals it:

divide error:  [#1] PREEMPT SMP KASAN PTI
RIP: 0010:SetOverlayViewPort+0x133/0x5f0 
drivers/video/fbdev/kyro/STG4000OverlayDevice.c:476
Call Trace:
 kyro_dev_overlay_viewport_set drivers/video/fbdev/kyro/fbdev.c:378 [inline]
 kyrofb_ioctl+0x2eb/0x330 drivers/video/fbdev/kyro/fbdev.c:603
 do_fb_ioctl+0x1f3/0x700 drivers/video/fbdev/core/fbmem.c:1171
 fb_ioctl+0xeb/0x130 drivers/video/fbdev/core/fbmem.c:1185
 vfs_ioctl fs/ioctl.c:48 [inline]
 __do_sys_ioctl fs/ioctl.c:753 [inline]
 __se_sys_ioctl fs/ioctl.c:739 [inline]
 __x64_sys_ioctl+0x19b/0x220 fs/ioctl.c:739
 do_syscall_64+0x32/0x80 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Signed-off-by: Zheyu Ma 
---
 drivers/video/fbdev/kyro/STG4000OverlayDevice.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/video/fbdev/kyro/STG4000OverlayDevice.c 
b/drivers/video/fbdev/kyro/STG4000OverlayDevice.c
index 9fde0e3b69ec..29d692fe5e75 100644
--- a/drivers/video/fbdev/kyro/STG4000OverlayDevice.c
+++ b/drivers/video/fbdev/kyro/STG4000OverlayDevice.c
@@ -407,6 +407,9 @@ int SetOverlayViewPort(volatile STG4000REG __iomem *pSTGReg,
ulVertDecFactor = 1;
}
 
+   if ((ulDest + 1) == 0)
+   return -EINVAL;
+
ulDacYScale = ((ulSrc - 1) * 2048) / (ulDest + 1);
 
tmp = STG_READ_REG(DACOverlayVtDec);/* Decimation */
@@ -471,6 +474,9 @@ int SetOverlayViewPort(volatile STG4000REG __iomem *pSTGReg,
 */
ulScaleLeft = ulSrcLeft;
 
+   if ((ulRight - ulLeft + 2) == 0)
+   return -EINVAL;
+
/* shift fxscale until it is in the range of the scaler */
ulhDecim = 0;
ulScale = (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / 
(ulRight - ulLeft + 2);
-- 
2.17.6



Re: [PATCH v2 2/2] drm/rockchip: dw_hdmi: add rk3568 support

2021-07-13 Thread Alex Bee

Hi Benjamin,

Am 07.07.21 um 14:03 schrieb Benjamin Gaignard:

Add a new dw_hdmi_plat_data struct and new compatible for rk3568.
This version of the HDMI hardware block need two clocks to provide
phy reference clock: hclk_vio and hclk.

Signed-off-by: Benjamin Gaignard 
---
version 2:
- Add the clocks needed for the phy.


If got Alega's comment correct, it wasn't about the hclks.
It looks like for this variant, there is another reference clock 
required (for the phy) like vpll is already (looks like downstream uses 
HPLL ( = "HDMI-PLL" ?) for that - which also has to switch the frequency 
according the the drm mode rate - the two clocks you added here are get 
just enabled (and disabled) here.


Alega, Andy: Is it really required to enable hclk_vio and hclk(_vop) in 
the hdmi driver? Are they required to be enabled for the other output 
variants (i.e. mipi, dsi, rgb ) as well and shouldn't better be 
enabled in the (not-yet existing) vop2 driver?


Overall: I'm not sure of the benefit of adding this hdmi variant for a 
SoC where the display driver isn't implemented upstream yet. The "VOP2" 
IP seems widely new and should probably be ported first. (even if the 
HDMI part seems a low hanging fruit according to the vendor sources)


Best,
Alex



  drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 68 +
  1 file changed, 68 insertions(+)

diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c 
b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 830bdd5e9b7ce..dc0e255e45745 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -50,6 +50,10 @@
  #define RK3399_GRF_SOC_CON20  0x6250
  #define RK3399_HDMI_LCDC_SEL  BIT(6)
  
+#define RK3568_GRF_VO_CON1		0x0364

+#define RK3568_HDMI_SDAIN_MSK  BIT(15)
+#define RK3568_HDMI_SCLIN_MSK  BIT(14)
+
  #define HIWORD_UPDATE(val, mask)  (val | (mask) << 16)
  
  /**

@@ -71,6 +75,8 @@ struct rockchip_hdmi {
const struct rockchip_hdmi_chip_data *chip_data;
struct clk *vpll_clk;
struct clk *grf_clk;
+   struct clk *hclk_vio;
+   struct clk *hclk_vop;
struct dw_hdmi *hdmi;
struct phy *phy;
  };
@@ -216,6 +222,26 @@ static int rockchip_hdmi_parse_dt(struct rockchip_hdmi 
*hdmi)
return PTR_ERR(hdmi->grf_clk);
}
  
+	hdmi->hclk_vio = devm_clk_get(hdmi->dev, "hclk_vio");

+   if (PTR_ERR(hdmi->hclk_vio) == -ENOENT) {
+   hdmi->hclk_vio = NULL;
+   } else if (PTR_ERR(hdmi->hclk_vio) == -EPROBE_DEFER) {
+   return -EPROBE_DEFER;
+   } else if (IS_ERR(hdmi->hclk_vio)) {
+   dev_err(hdmi->dev, "failed to get hclk_vio clock\n");
+   return PTR_ERR(hdmi->hclk_vio);
+   }
+
+   hdmi->hclk_vop = devm_clk_get(hdmi->dev, "hclk");
+   if (PTR_ERR(hdmi->hclk_vop) == -ENOENT) {
+   hdmi->hclk_vop = NULL;
+   } else if (PTR_ERR(hdmi->hclk_vop) == -EPROBE_DEFER) {
+   return -EPROBE_DEFER;
+   } else if (IS_ERR(hdmi->hclk_vop)) {
+   dev_err(hdmi->dev, "failed to get hclk_vop clock\n");
+   return PTR_ERR(hdmi->hclk_vop);
+   }
+
return 0;
  }
  
@@ -467,6 +493,19 @@ static const struct dw_hdmi_plat_data rk3399_hdmi_drv_data = {

.use_drm_infoframe = true,
  };
  
+static struct rockchip_hdmi_chip_data rk3568_chip_data = {

+   .lcdsel_grf_reg = -1,
+};
+
+static const struct dw_hdmi_plat_data rk3568_hdmi_drv_data = {
+   .mode_valid = dw_hdmi_rockchip_mode_valid,
+   .mpll_cfg   = rockchip_mpll_cfg,
+   .cur_ctr= rockchip_cur_ctr,
+   .phy_config = rockchip_phy_config,
+   .phy_data = _chip_data,
+   .use_drm_infoframe = true,
+};
+
  static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = {
{ .compatible = "rockchip,rk3228-dw-hdmi",
  .data = _hdmi_drv_data
@@ -480,6 +519,9 @@ static const struct of_device_id dw_hdmi_rockchip_dt_ids[] 
= {
{ .compatible = "rockchip,rk3399-dw-hdmi",
  .data = _hdmi_drv_data
},
+   { .compatible = "rockchip,rk3568-dw-hdmi",
+ .data = _hdmi_drv_data
+   },
{},
  };
  MODULE_DEVICE_TABLE(of, dw_hdmi_rockchip_dt_ids);
@@ -536,6 +578,28 @@ static int dw_hdmi_rockchip_bind(struct device *dev, 
struct device *master,
return ret;
}
  
+	ret = clk_prepare_enable(hdmi->hclk_vio);

+   if (ret) {
+   dev_err(hdmi->dev, "Failed to enable HDMI hclk_vio: %d\n",
+   ret);
+   return ret;
+   }
+
+   ret = clk_prepare_enable(hdmi->hclk_vop);
+   if (ret) {
+   dev_err(hdmi->dev, "Failed to enable HDMI hclk_vop: %d\n",
+   ret);
+   return ret;
+   }
+
+   if (hdmi->chip_data == _chip_data) {
+   regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1,
+

[Bug 213715] failed to change brightness of HDR panel on AMD GREEN_SARDINE through aux

2021-07-13 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=213715

--- Comment #1 from Pengyu Ma (mapen...@gmail.com) ---
Created attachment 297821
  --> https://bugzilla.kernel.org/attachment.cgi?id=297821=edit
4k display edid

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

[Bug 213715] New: failed to change brightness of HDR panel on AMD GREEN_SARDINE through aux

2021-07-13 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=213715

Bug ID: 213715
   Summary: failed to change brightness of HDR panel on AMD
GREEN_SARDINE through aux
   Product: Drivers
   Version: 2.5
Kernel Version: 5.14.0-rc1+
  Hardware: All
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: blocking
  Priority: P1
 Component: Video(DRI - non Intel)
  Assignee: drivers_video-...@kernel-bugs.osdl.org
  Reporter: mapen...@gmail.com
Regression: No

Created attachment 297819
  --> https://bugzilla.kernel.org/attachment.cgi?id=297819=edit
dmesg

HW: ThinkPad P14s gen 2a
CPU: AMD Ryzen 7 PRO 5850U with Radeon Graphics
Panel: BOE 2434 and  CSO 5127

brightness can't be controlled through AUX.
It works with parameter "amdgpu.backlight=0"

dmesg attached.

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH] drm/of: free the iterator object on failure

2021-07-13 Thread Steven Price
On 12/07/2021 22:55, Laurent Pinchart wrote:
> Hi Steven,

Hi Laurent,

> On Mon, Jul 12, 2021 at 10:31:52PM +0100, Steven Price wrote:
>> On 12/07/2021 17:50, Laurent Pinchart wrote:
>>> On Mon, Jul 12, 2021 at 04:57:58PM +0100, Steven Price wrote:
 When bailing out due to the sanity check the iterator value needs to be
 freed because the early return prevents for_each_child_of_node() from
 doing the dereference itself.

 Fixes: 4ee48cc5586b ("drm: of: Fix double-free bug")
>>>
>>> I don't think the Fixes tag is correct, the issue was already present
>>> before 4ee48cc5586b. The fix looks right though.
>>
>> I'm not sure quite what you mean by "already present". As I understand
>> it the timeline was:
>>
>> 1. 6529007522de drm: of: Add drm_of_lvds_get_dual_link_pixel_order
>>The function was originally added. This made the mistake twice of
>>calling of_node_put() on the wrong variable (remote_port rather than
>>endpoint).
> 
> Correct.
> 
>> 2. 4ee48cc5586b drm: of: Fix double-free bug
>>One of the of_node_put() calls was removed as it was a double-free.
>>This left the first incorrect of_node_put() in place, and the second
>>is now a straight leak.
> 
> That's right, but this commit didn't introduce the leak, it was already
> there in 6529007522de (in addition to the double-free).

Ah, I see what you mean. My thought process was that the original
comment had the bug "using the wrong variable", and (2) (partially)
fixed that but in the process introduced a new bug (a memory leak). But
I guess technically the memory leak was there from the beginning.

The other reason I referenced (2) in the Fixes line is because this
patch depends on patch (2), whereas it won't apply cleanly without.

However I don't think it really matters either way: (2) has already been
backported, and either way this needs fixing if either (1) or (2) are
present.

Would you like me to resend with a "Fixes: 6529007522de drm: of: Add
drm_of_lvds_get_dual_link_pixel_order", or are you happy to just fix
this up when merging?

Thanks,

Steve

>> 3. b557a5f8da57 drm/of: free the right object
>>This (correctly) fixes the first of_node_put() to free endpoint. And
>>the post from Daniel was what caused me to look.
>>
>> 4. This patch
>>Reintroduces the of_node_put() removed in (2) but putting endpoint
>>rather than remote_port.
>>
>> I've put (2) in the Fixes line as this patch is fixing the leak
>> introduced by that patch, but that in itself was of course 'fixing' the
>> double free of the original patch.
>>
 Signed-off-by: Steven Price 
 ---
  drivers/gpu/drm/drm_of.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 Daniel's email[1] made me take a look at this function and it appears
 that for_each_child_of_node()'s interface had caused a bad bug fix due
 to the hidden reference counting in the iterator.

 [1] https://lore.kernel.org/r/YOxQ5TbkNrqCGBDJ%40phenom.ffwll.local

 diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
 index 197c57477344..997b8827fed2 100644
 --- a/drivers/gpu/drm/drm_of.c
 +++ b/drivers/gpu/drm/drm_of.c
 @@ -331,8 +331,10 @@ static int drm_of_lvds_get_remote_pixels_type(
 * configurations by passing the endpoints explicitly to
 * drm_of_lvds_get_dual_link_pixel_order().
 */
 -  if (!current_pt || pixels_type != current_pt)
 +  if (!current_pt || pixels_type != current_pt) {
 +  of_node_put(endpoint);
return -EINVAL;
 +  }
}
  
return pixels_type;
> 



Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Matthew Auld
On Tue, 13 Jul 2021 at 16:55, Ville Syrjälä
 wrote:
>
> On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> > + /**
> > +  * @cache_coherent:
> > +  *
> > +  * Track whether the pages are coherent with the GPU if reading or
> > +  * writing through the CPU cache.
> > +  *
> > +  * This largely depends on the @cache_level, for example if the object
> > +  * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> > +  * reads and writes through the CPU cache.
> > +  *
> > +  * Note that on platforms with shared-LLC support(HAS_LLC) reads 
> > through
> > +  * the CPU cache are always coherent, regardless of the @cache_level. 
> > On
> > +  * snooping based platforms this is not the case, unless the full
> > +  * I915_CACHE_LLC or similar setting is used.
> > +  *
> > +  * As a result of this we need to track coherency separately for reads
> > +  * and writes, in order to avoid superfluous flushing on shared-LLC
> > +  * platforms, for reads.
> > +  *
> > +  * I915_BO_CACHE_COHERENT_FOR_READ:
> > +  *
> > +  * When reading through the CPU cache, the GPU is still coherent. Note
> > +  * that no data has actually been modified here, so it might seem
> > +  * strange that we care about this.
> > +  *
> > +  * As an example, if some object is mapped on the CPU with write-back
> > +  * caching, and we read some page, then the cache likely now contains
> > +  * the data from that read. At this point the cache and main memory
> > +  * match up, so all good. But next the GPU needs to write some data to
> > +  * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> > +  * the platform doesn't have the shared-LLC, then the GPU will
> > +  * effectively skip invalidating the cache(or however that works
> > +  * internally) when writing the new value.  This is really bad since 
> > the
> > +  * GPU has just written some new data to main memory, but the CPU 
> > cache
> > +  * is still valid and now contains stale data. As a result the next 
> > time
> > +  * we do a cached read with the CPU, we are rewarded with stale data.
> > +  * Likewise if the cache is later flushed, we might be rewarded with
> > +  * overwriting main memory with stale data.
> > +  *
> > +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> > +  *
> > +  * When writing through the CPU cache, the GPU is still coherent. Note
> > +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> > +  *
> > +  * This is never set when I915_CACHE_NONE is used for @cache_level,
> > +  * where instead we have to manually flush the caches after writing
> > +  * through the CPU cache. For other cache levels this should be set 
> > and
> > +  * the object is therefore considered coherent for both reads and 
> > writes
> > +  * through the CPU cache.
>
> I don't remember why we have this read vs. write split and this new
> documentation doesn't seem to really explain it either.

Hmm, I attempted to explain that earlier:

* Note that on platforms with shared-LLC support(HAS_LLC) reads through
* the CPU cache are always coherent, regardless of the @cache_level. On
* snooping based platforms this is not the case, unless the full
* I915_CACHE_LLC or similar setting is used.
*
* As a result of this we need to track coherency separately for reads
* and writes, in order to avoid superfluous flushing on shared-LLC
* platforms, for reads.

So AFAIK it's just because shared-LLC can be coherent for reads, while
also not being coherent for writes(CACHE_NONE), so being able to track
each separately is kind of needed to avoid unnecessary flushing for
the read cases i.e simple boolean for coherent vs non-coherent is not
enough.

I can try to reword things to make that more clear.

>
> Is it for optimizing some display related case where we can omit the
> invalidates but still have to do the writeback to keep the display
> engine happy?
>
> --
> Ville Syrjälä
> Intel


Re: [PATCH v4 02/18] drm/sched: Barriers are needed for entity->last_scheduled

2021-07-13 Thread Andrey Grodzovsky



On 2021-07-13 5:10 a.m., Daniel Vetter wrote:

On Tue, Jul 13, 2021 at 9:25 AM Christian König
 wrote:

Am 13.07.21 um 08:50 schrieb Daniel Vetter:

On Tue, Jul 13, 2021 at 8:35 AM Christian König
 wrote:

Am 12.07.21 um 19:53 schrieb Daniel Vetter:

It might be good enough on x86 with just READ_ONCE, but the write side
should then at least be WRITE_ONCE because x86 has total store order.

It's definitely not enough on arm.

Fix this proplery, which means
- explain the need for the barrier in both places
- point at the other side in each comment

Also pull out the !sched_list case as the first check, so that the
code flow is clearer.

While at it sprinkle some comments around because it was very
non-obvious to me what's actually going on here and why.

Note that we really need full barriers here, at first I thought
store-release and load-acquire on ->last_scheduled would be enough,
but we actually requiring ordering between that and the queue state.

v2: Put smp_rmp() in the right place and fix up comment (Andrey)

Signed-off-by: Daniel Vetter 
Cc: "Christian König" 
Cc: Steven Price 
Cc: Daniel Vetter 
Cc: Andrey Grodzovsky 
Cc: Lee Jones 
Cc: Boris Brezillon 
---
drivers/gpu/drm/scheduler/sched_entity.c | 27 ++--
1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index f7347c284886..89e3f6eaf519 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -439,8 +439,16 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct 
drm_sched_entity *entity)
dma_fence_set_error(_job->s_fence->finished, -ECANCELED);

dma_fence_put(entity->last_scheduled);
+
entity->last_scheduled = dma_fence_get(_job->s_fence->finished);

+ /*
+  * If the queue is empty we allow drm_sched_entity_select_rq() to
+  * locklessly access ->last_scheduled. This only works if we set the
+  * pointer before we dequeue and if we a write barrier here.
+  */
+ smp_wmb();
+

Again, conceptual those barriers should be part of the spsc_queue
container and not externally.

That would be extremely unusual api. Let's assume that your queue is
very dumb, and protected by a simple lock. That's about the maximum
any user could expect.

But then you still need barriers here, because linux locks (spinlock,
mutex) are defined to be one-way barriers: Stuff that's inside is
guaranteed to be done insinde, but stuff outside of the locked region
can leak in. They're load-acquire/store-release barriers. So not good
enough.

You really need to have barriers here, and they really all need to be
documented properly. And yes that's a shit-ton of work in drm/sched,
because it's full of yolo lockless stuff.

The other case you could make is that this works like a wakeup queue,
or similar. The rules there are:
- wake_up (i.e. pushing something into the queue) is a store-release barrier
- the waked up (i.e. popping an entry) is a load acquire barrier
Which is obviuosly needed because otherwise you don't have coherency
for the data queued up. And again not the barriers you're locking for
here.

Exactly that was the idea, yes.


Either way, we'd still need the comments, because it's still lockless
trickery, and every single one of that needs to have a comment on both
sides to explain what's going on.

Essentially replace spsc_queue with an llist underneath, and that's
the amount of barriers a data structure should provide. Anything else
is asking your datastructure to paper over bugs in your users.

This is similar to how atomic_t is by default completely unordered,
and users need to add barriers as needed, with comments.

My main problem is as always that kernel atomics work different than
userspace atomics.


I think this is all to make sure people don't just write lockless algorithms
because it's a cool idea, but are forced to think this all through.
Which seems to not have happened very consistently for drm/sched, so I
guess needs to be fixed.

Well at least initially that was all perfectly thought through. The
problem is nobody is really maintaining that stuff.


I'm definitely not going to hide all that by making the spsc_queue
stuff provide random unjustified barriers just because that would
paper over drm/sched bugs. We need to fix the actual bugs, and
preferrable all of them. I've found a few, but I wasn't involved in
drm/sched thus far, so best I can do is discover them as we go.

I don't think that those are random unjustified barriers at all and it
sounds like you didn't grip what I said here.

See the spsc queue must have the following semantics:

1. When you pop a job all changes made before you push the job must be
visible.

This is the standard barriers that also wake-up queues have, it's just
store-release+load-acquire.


2. When the queue becomes empty all the changes made before you pop the
last job must be visible.

This 

Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Ville Syrjälä
On Tue, Jul 13, 2021 at 11:45:50AM +0100, Matthew Auld wrote:
> + /**
> +  * @cache_coherent:
> +  *
> +  * Track whether the pages are coherent with the GPU if reading or
> +  * writing through the CPU cache.
> +  *
> +  * This largely depends on the @cache_level, for example if the object
> +  * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> +  * reads and writes through the CPU cache.
> +  *
> +  * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> +  * the CPU cache are always coherent, regardless of the @cache_level. On
> +  * snooping based platforms this is not the case, unless the full
> +  * I915_CACHE_LLC or similar setting is used.
> +  *
> +  * As a result of this we need to track coherency separately for reads
> +  * and writes, in order to avoid superfluous flushing on shared-LLC
> +  * platforms, for reads.
> +  *
> +  * I915_BO_CACHE_COHERENT_FOR_READ:
> +  *
> +  * When reading through the CPU cache, the GPU is still coherent. Note
> +  * that no data has actually been modified here, so it might seem
> +  * strange that we care about this.
> +  *
> +  * As an example, if some object is mapped on the CPU with write-back
> +  * caching, and we read some page, then the cache likely now contains
> +  * the data from that read. At this point the cache and main memory
> +  * match up, so all good. But next the GPU needs to write some data to
> +  * that same page. Now if the @cache_level is I915_CACHE_NONE and the
> +  * the platform doesn't have the shared-LLC, then the GPU will
> +  * effectively skip invalidating the cache(or however that works
> +  * internally) when writing the new value.  This is really bad since the
> +  * GPU has just written some new data to main memory, but the CPU cache
> +  * is still valid and now contains stale data. As a result the next time
> +  * we do a cached read with the CPU, we are rewarded with stale data.
> +  * Likewise if the cache is later flushed, we might be rewarded with
> +  * overwriting main memory with stale data.
> +  *
> +  * I915_BO_CACHE_COHERENT_FOR_WRITE:
> +  *
> +  * When writing through the CPU cache, the GPU is still coherent. Note
> +  * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
> +  *
> +  * This is never set when I915_CACHE_NONE is used for @cache_level,
> +  * where instead we have to manually flush the caches after writing
> +  * through the CPU cache. For other cache levels this should be set and
> +  * the object is therefore considered coherent for both reads and writes
> +  * through the CPU cache.

I don't remember why we have this read vs. write split and this new
documentation doesn't seem to really explain it either.

Is it for optimizing some display related case where we can omit the
invalidates but still have to do the writeback to keep the display
engine happy?

-- 
Ville Syrjälä
Intel


[PATCH v2 7/7] drm/msm/dp: retrain link when loss of symbol lock detected

2021-07-13 Thread Kuogee Hsieh
Main link symbol locked is achieved at end of link training 2. Some
dongle main link symbol may become unlocked again if host did not end
link training soon enough after completion of link training 2. Host
have to re train main link if loss of symbol lock detected before
end link training so that the coming video stream can be transmitted
to sink properly.

Signed-off-by: Kuogee Hsieh 
---
 drivers/gpu/drm/msm/dp/dp_ctrl.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index 6a013b0..20951c8 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1638,6 +1638,25 @@ static bool dp_ctrl_clock_recovery_any_ok(
return drm_dp_clock_recovery_ok(link_status, lane_count);
 }
 
+static bool dp_ctrl_loss_symbol_lock(struct dp_ctrl_private *ctrl)
+{
+   u8 link_status[DP_LINK_STATUS_SIZE];
+   u8 status;
+   int i;
+   int num_lanes = ctrl->link->link_params.num_lanes;
+
+   dp_ctrl_read_link_status(ctrl, link_status);
+
+   for (i = 0; i < num_lanes; i++) {
+   status = link_status[i / 2];
+   status >>= ((i % 2) * 4);
+   if (!(status & DP_LANE_SYMBOL_LOCKED))
+   return true;
+   }
+
+   return false;
+}
+
 int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
 {
int rc = 0;
@@ -1761,6 +1780,13 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
return rc;
 }
 
+static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl)
+{
+   int training_step = DP_TRAINING_NONE;
+
+   return dp_ctrl_setup_main_link(ctrl, _step);
+}
+
 int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
 {
int ret = 0;
@@ -1786,6 +1812,9 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
}
}
 
+   if (dp_ctrl_loss_symbol_lock(ctrl))
+   dp_ctrl_link_retrain(ctrl);
+
/* stop txing train pattern to end link training */
dp_ctrl_clear_training_pattern(ctrl);
 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 6/7] drm/msm/dp: do not end dp link training until video is ready

2021-07-13 Thread Kuogee Hsieh
Initialize both pre-emphasis and voltage swing level to 0 before
start link training and do not end link training until video is
ready to reduce the period between end of link training and video
start to meet Link Layer CTS requirement.  This fixes Link Layer
CTS cases 4.3.2.1, 4.3.2.2, 4.3.2.3 and 4.3.2.4.

Signed-off-by: Kuogee Hsieh 
---
 drivers/gpu/drm/msm/dp/dp_ctrl.c | 36 +---
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index 92cf331..6a013b0 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1484,6 +1484,9 @@ static int dp_ctrl_link_maintenance(struct 
dp_ctrl_private *ctrl)
 
dp_ctrl_push_idle(>dp_ctrl);
 
+   ctrl->link->phy_params.p_level = 0;
+   ctrl->link->phy_params.v_level = 0;
+
ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock;
 
ret = dp_ctrl_setup_main_link(ctrl, _step);
@@ -1670,6 +1673,9 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
ctrl->link->link_params.rate,
ctrl->link->link_params.num_lanes, ctrl->dp_ctrl.pixel_rate);
 
+   ctrl->link->phy_params.p_level = 0;
+   ctrl->link->phy_params.v_level = 0;
+
rc = dp_ctrl_enable_mainlink_clocks(ctrl);
if (rc)
return rc;
@@ -1735,17 +1741,19 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN)
return rc;
 
-   /* stop txing train pattern */
-   dp_ctrl_clear_training_pattern(ctrl);
+   if (rc == 0) {  /* link train successfully */
+   /*
+* do not stop train pattern here
+* stop link training at on_stream
+* to pass compliance test
+*/
+   } else  {
+   /*
+* link training failed
+* end txing train pattern here
+*/
+   dp_ctrl_clear_training_pattern(ctrl);
 
-   /*
-* keep transmitting idle pattern until video ready
-* to avoid main link from loss of sync
-*/
-   if (rc == 0)  /* link train successfully */
-   dp_ctrl_push_idle(dp_ctrl);
-   else  {
-   /* link training failed */
dp_ctrl_deinitialize_mainlink(ctrl);
rc = -ECONNRESET;
}
@@ -1755,7 +1763,6 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl)
 
 int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
 {
-   u32 rate = 0;
int ret = 0;
bool mainlink_ready = false;
struct dp_ctrl_private *ctrl;
@@ -1765,10 +1772,6 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
 
ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl);
 
-   rate = ctrl->panel->link_info.rate;
-
-   ctrl->link->link_params.rate = rate;
-   ctrl->link->link_params.num_lanes = ctrl->panel->link_info.num_lanes;
ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock;
 
DRM_DEBUG_DP("rate=%d, num_lanes=%d, pixel_rate=%d\n",
@@ -1783,6 +1786,9 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
}
}
 
+   /* stop txing train pattern to end link training */
+   dp_ctrl_clear_training_pattern(ctrl);
+
ret = dp_ctrl_enable_stream_clocks(ctrl);
if (ret) {
DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 5/7] drm/msm/dp: return correct edid checksum after corrupted edid checksum read

2021-07-13 Thread Kuogee Hsieh
Response with correct edid checksum saved at connector after corrupted edid
checksum read. This fixes Link Layer CTS cases 4.2.2.3, 4.2.2.6.

Signed-off-by: Kuogee Hsieh 
---
 drivers/gpu/drm/msm/dp/dp_panel.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c 
b/drivers/gpu/drm/msm/dp/dp_panel.c
index 88196f7..0fdb551 100644
--- a/drivers/gpu/drm/msm/dp/dp_panel.c
+++ b/drivers/gpu/drm/msm/dp/dp_panel.c
@@ -271,7 +271,7 @@ static u8 dp_panel_get_edid_checksum(struct edid *edid)
 {
struct edid *last_block;
u8 *raw_edid;
-   bool is_edid_corrupt;
+   bool is_edid_corrupt = false;
 
if (!edid) {
DRM_ERROR("invalid edid input\n");
@@ -303,7 +303,12 @@ void dp_panel_handle_sink_request(struct dp_panel 
*dp_panel)
panel = container_of(dp_panel, struct dp_panel_private, dp_panel);
 
if (panel->link->sink_request & DP_TEST_LINK_EDID_READ) {
-   u8 checksum = dp_panel_get_edid_checksum(dp_panel->edid);
+   u8 checksum;
+
+   if (dp_panel->edid)
+   checksum = dp_panel_get_edid_checksum(dp_panel->edid);
+   else
+   checksum = dp_panel->connector->real_edid_checksum;
 
dp_link_send_edid_checksum(panel->link, checksum);
dp_link_send_test_response(panel->link);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 3/7] drm/msm/dp: reset aux controller after dp_aux_cmd_fifo_tx() failed.

2021-07-13 Thread Kuogee Hsieh
Aux hardware calibration sequence requires resetting the aux controller
in order for the new setting to take effect. However resetting the AUX
controller will also clear HPD interrupt status which may accidentally
cause pending unplug interrupt to get lost. Therefore reset aux
controller only when link is in connection state when dp_aux_cmd_fifo_tx()
fail. This fixes Link Layer CTS cases 4.2.1.1 and 4.2.1.2.

Signed-off-by: Kuogee Hsieh 
---
 drivers/gpu/drm/msm/dp/dp_aux.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c
index 4a3293b..eb40d84 100644
--- a/drivers/gpu/drm/msm/dp/dp_aux.c
+++ b/drivers/gpu/drm/msm/dp/dp_aux.c
@@ -353,6 +353,9 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
if (!(aux->retry_cnt % MAX_AUX_RETRIES))
dp_catalog_aux_update_cfg(aux->catalog);
}
+   /* reset aux if link is in connected state */
+   if (dp_catalog_link_is_connected(aux->catalog))
+   dp_catalog_aux_reset(aux->catalog);
} else {
aux->retry_cnt = 0;
switch (aux->aux_error_num) {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 4/7] drm/msm/dp: replug event is converted into an unplug followed by an plug events

2021-07-13 Thread Kuogee Hsieh
Remove special handling of replug interrupt and instead treat replug event
as a sequential unplug followed by a plugin event. This is needed to meet
the requirements of DP Link Layer CTS test case 4.2.1.3.

Changes in V2:
-- add fixes statement

Fixes: f21c8a276c2d ("drm/msm/dp: handle irq_hpd with sink_count = 0 correctly")

Signed-off-by: Kuogee Hsieh 
---
 drivers/gpu/drm/msm/dp/dp_display.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c 
b/drivers/gpu/drm/msm/dp/dp_display.c
index 78c5301..d089ada 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -1146,9 +1146,6 @@ static int hpd_event_thread(void *data)
case EV_IRQ_HPD_INT:
dp_irq_hpd_handle(dp_priv, todo->data);
break;
-   case EV_HPD_REPLUG_INT:
-   /* do nothing */
-   break;
case EV_USER_NOTIFICATION:
dp_display_send_hpd_notification(dp_priv,
todo->data);
@@ -1192,10 +1189,8 @@ static irqreturn_t dp_display_irq_handler(int irq, void 
*dev_id)
 
if (hpd_isr_status & 0x0F) {
/* hpd related interrupts */
-   if (hpd_isr_status & DP_DP_HPD_PLUG_INT_MASK ||
-   hpd_isr_status & DP_DP_HPD_REPLUG_INT_MASK) {
+   if (hpd_isr_status & DP_DP_HPD_PLUG_INT_MASK)
dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0);
-   }
 
if (hpd_isr_status & DP_DP_IRQ_HPD_INT_MASK) {
/* stop sentinel connect pending checking */
@@ -1203,8 +1198,10 @@ static irqreturn_t dp_display_irq_handler(int irq, void 
*dev_id)
dp_add_event(dp, EV_IRQ_HPD_INT, 0, 0);
}
 
-   if (hpd_isr_status & DP_DP_HPD_REPLUG_INT_MASK)
-   dp_add_event(dp, EV_HPD_REPLUG_INT, 0, 0);
+   if (hpd_isr_status & DP_DP_HPD_REPLUG_INT_MASK) {
+   dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0);
+   dp_add_event(dp, EV_HPD_PLUG_INT, 0, 3);
+   }
 
if (hpd_isr_status & DP_DP_HPD_UNPLUG_INT_MASK)
dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 2/7] drm/msm/dp: reduce link rate if failed at link training 1

2021-07-13 Thread Kuogee Hsieh
Reduce link rate and re start link training if link training 1
failed due to loss of clock recovery done to fix Link Layer
CTS case 4.3.1.7.  Also only update voltage and pre-emphasis
swing level after link training started to fix Link Layer CTS
case 4.3.1.6.

Changes in V2:
-- replaced cr_status with link_status[DP_LINK_STATUS_SIZE]
-- replaced dp_ctrl_any_lane_cr_done() with dp_ctrl_colco_recovery_any_ok()
-- replaced dp_ctrl_any_ane_cr_lose() with !drm_dp_clock_recovery_ok()

Signed-off-by: Kuogee Hsieh 
---
 drivers/gpu/drm/msm/dp/dp_ctrl.c | 78 ++--
 1 file changed, 44 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index 27fb0f0..92cf331 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -83,13 +83,6 @@ struct dp_ctrl_private {
struct completion video_comp;
 };
 
-struct dp_cr_status {
-   u8 lane_0_1;
-   u8 lane_2_3;
-};
-
-#define DP_LANE0_1_CR_DONE 0x11
-
 static int dp_aux_link_configure(struct drm_dp_aux *aux,
struct dp_link_info *link)
 {
@@ -1080,7 +1073,7 @@ static int dp_ctrl_read_link_status(struct 
dp_ctrl_private *ctrl,
 }
 
 static int dp_ctrl_link_train_1(struct dp_ctrl_private *ctrl,
-   struct dp_cr_status *cr, int *training_step)
+   int *training_step)
 {
int tries, old_v_level, ret = 0;
u8 link_status[DP_LINK_STATUS_SIZE];
@@ -1109,9 +1102,6 @@ static int dp_ctrl_link_train_1(struct dp_ctrl_private 
*ctrl,
if (ret)
return ret;
 
-   cr->lane_0_1 = link_status[0];
-   cr->lane_2_3 = link_status[1];
-
if (drm_dp_clock_recovery_ok(link_status,
ctrl->link->link_params.num_lanes)) {
return 0;
@@ -1188,7 +1178,7 @@ static void dp_ctrl_clear_training_pattern(struct 
dp_ctrl_private *ctrl)
 }
 
 static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl,
-   struct dp_cr_status *cr, int *training_step)
+   int *training_step)
 {
int tries = 0, ret = 0;
char pattern;
@@ -1204,10 +1194,6 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private 
*ctrl,
else
pattern = DP_TRAINING_PATTERN_2;
 
-   ret = dp_ctrl_update_vx_px(ctrl);
-   if (ret)
-   return ret;
-
ret = dp_catalog_ctrl_set_pattern(ctrl->catalog, pattern);
if (ret)
return ret;
@@ -1220,8 +1206,6 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private 
*ctrl,
ret = dp_ctrl_read_link_status(ctrl, link_status);
if (ret)
return ret;
-   cr->lane_0_1 = link_status[0];
-   cr->lane_2_3 = link_status[1];
 
if (drm_dp_channel_eq_ok(link_status,
ctrl->link->link_params.num_lanes)) {
@@ -1241,7 +1225,7 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private 
*ctrl,
 static int dp_ctrl_reinitialize_mainlink(struct dp_ctrl_private *ctrl);
 
 static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl,
-   struct dp_cr_status *cr, int *training_step)
+   int *training_step)
 {
int ret = 0;
u8 encoding = DP_SET_ANSI_8B10B;
@@ -1257,7 +1241,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private 
*ctrl,
drm_dp_dpcd_write(ctrl->aux, DP_MAIN_LINK_CHANNEL_CODING_SET,
, 1);
 
-   ret = dp_ctrl_link_train_1(ctrl, cr, training_step);
+   ret = dp_ctrl_link_train_1(ctrl, training_step);
if (ret) {
DRM_ERROR("link training #1 failed. ret=%d\n", ret);
goto end;
@@ -1266,7 +1250,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private 
*ctrl,
/* print success info as this is a result of user initiated action */
DRM_DEBUG_DP("link training #1 successful\n");
 
-   ret = dp_ctrl_link_train_2(ctrl, cr, training_step);
+   ret = dp_ctrl_link_train_2(ctrl, training_step);
if (ret) {
DRM_ERROR("link training #2 failed. ret=%d\n", ret);
goto end;
@@ -1282,7 +1266,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private 
*ctrl,
 }
 
 static int dp_ctrl_setup_main_link(struct dp_ctrl_private *ctrl,
-   struct dp_cr_status *cr, int *training_step)
+   int *training_step)
 {
int ret = 0;
 
@@ -1297,7 +1281,7 @@ static int dp_ctrl_setup_main_link(struct dp_ctrl_private 
*ctrl,
 * a link training pattern, we have to first do soft reset.
 */
 
-   ret = dp_ctrl_link_train(ctrl, cr, training_step);
+   ret = dp_ctrl_link_train(ctrl, training_step);
 
return ret;
 }
@@ -1496,14 +1480,13 @@ static int dp_ctrl_deinitialize_mainlink(struct 
dp_ctrl_private *ctrl)
 static int 

[PATCH v2 1/7] drm/msm/dp: use dp_ctrl_off_link_stream during PHY compliance test run

2021-07-13 Thread Kuogee Hsieh
DP cable should always connect to DPU during the entire PHY compliance
testing run. Since DP PHY compliance test is executed at irq_hpd event
context, dp_ctrl_off_link_stream() should be used instead of dp_ctrl_off().
dp_ctrl_off() is used for unplug event which is triggered when DP cable is
dis connected.

Changes in V2:
-- add fixes statement

Fixes: f21c8a276c2d ("drm/msm/dp: handle irq_hpd with sink_count = 0 correctly")

Signed-off-by: Kuogee Hsieh 
---
 drivers/gpu/drm/msm/dp/dp_ctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index caf71fa..27fb0f0 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1530,7 +1530,7 @@ static int dp_ctrl_process_phy_test_request(struct 
dp_ctrl_private *ctrl)
 * running. Add the global reset just before disabling the
 * link clocks and core clocks.
 */
-   ret = dp_ctrl_off(>dp_ctrl);
+   ret = dp_ctrl_off_link_stream(>dp_ctrl);
if (ret) {
DRM_ERROR("failed to disable DP controller\n");
return ret;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 0/7] add fixes to pass DP Link Layer compliance test cases

2021-07-13 Thread Kuogee Hsieh
add fixes to pass DP Link Layer compliance test cases

Kuogee Hsieh (7):
  drm/msm/dp: use dp_ctrl_off_link_stream during PHY compliance test run
  drm/msm/dp: reduce link rate if failed at link training 1
  drm/msm/dp: reset aux controller after dp_aux_cmd_fifo_tx() failed.
  drm/msm/dp: replug event is converted into an unplug followed by an
plug events
  drm/msm/dp: return correct edid checksum after corrupted edid checksum
read
  drm/msm/dp: do not end dp link training until video is ready
  drm/msm/dp: retrain link when loss of symbol lock detected

 drivers/gpu/drm/msm/dp/dp_aux.c |   3 +
 drivers/gpu/drm/msm/dp/dp_ctrl.c| 145 +++-
 drivers/gpu/drm/msm/dp/dp_display.c |  13 ++--
 drivers/gpu/drm/msm/dp/dp_panel.c   |   9 ++-
 4 files changed, 110 insertions(+), 60 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



Re: [PATCH v3] backlight: ktd253: Stabilize backlight

2021-07-13 Thread Linus Walleij
On Fri, Jun 4, 2021 at 8:34 AM Linus Walleij  wrote:

> Remove interrupt disablement during backlight setting. It is
> way to dangerous and makes platforms instable by having it
> miss vblank IRQs leading to the graphics derailing.
>
> The code is using ndelay() which is not available on
> platforms such as ARM and will result in 32 * udelay(1)
> which is substantial.
>
> Add some code to detect if an interrupt occurs during the
> tight loop and in that case just redo it from the top.
>
> Fixes: 5317f37e48b9 ("backlight: Add Kinetic KTD253 backlight driver")
> Cc: Stephan Gerhold 
> Reported-by: newb...@disroot.org
> Signed-off-by: Linus Walleij 

Hm it seems this patch did not make it into v5.14-rc1, could it be applied
as a fix for the -rc:s?

Shall I resend it with Daniel's ACK?

Yours,
Linus Walleij


Re: [PATCH v3 1/1] drm/ttm: Fix COW check

2021-07-13 Thread Felix Kuehling
Am 2021-07-13 um 2:57 a.m. schrieb Christian König:
>
>
> Am 13.07.21 um 00:06 schrieb Felix Kuehling:
>> KFD Thunk maps invisible VRAM BOs with PROT_NONE, MAP_PRIVATE.
>> is_cow_mapping returns true for these mappings. Add a check for
>> vm_flags & VM_WRITE to avoid mmap failures on private read-only or
>> PROT_NONE mappings.
>>
>> v2: protect against mprotect making a mapping writable after the fact
>> v3: update driver-specific vm_operations_structs
>>
>> Fixes: f91142c62161 ("drm/ttm: nuke VM_MIXEDMAP on BO mappings v3")
>> Signed-off-by: Felix Kuehling 
>> Signed-off-by: Alex Deucher 
>
> Reviewed-by: Christian König 

Thank you!

Alex, this patch is against your updated amd-staging-drm-next branch.
Please replace my previous version of the patch with this one.

Thanks,
  Felix


>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c  |  3 ++-
>>   drivers/gpu/drm/nouveau/nouveau_gem.c    |  3 ++-
>>   drivers/gpu/drm/radeon/radeon_gem.c  |  3 ++-
>>   drivers/gpu/drm/ttm/ttm_bo_vm.c  | 14 +-
>>   drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c |  1 +
>>   include/drm/ttm/ttm_bo_api.h |  4 
>>   6 files changed, 24 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index b3404c43a911..1aa750a6a5d2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -79,7 +79,8 @@ static const struct vm_operations_struct
>> amdgpu_gem_vm_ops = {
>>   .fault = amdgpu_gem_fault,
>>   .open = ttm_bo_vm_open,
>>   .close = ttm_bo_vm_close,
>> -    .access = ttm_bo_vm_access
>> +    .access = ttm_bo_vm_access,
>> +    .mprotect = ttm_bo_vm_mprotect
>>   };
>>     static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c
>> b/drivers/gpu/drm/nouveau/nouveau_gem.c
>> index 5b27845075a1..164ea564bb7a 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
>> @@ -70,7 +70,8 @@ static const struct vm_operations_struct
>> nouveau_ttm_vm_ops = {
>>   .fault = nouveau_ttm_fault,
>>   .open = ttm_bo_vm_open,
>>   .close = ttm_bo_vm_close,
>> -    .access = ttm_bo_vm_access
>> +    .access = ttm_bo_vm_access,
>> +    .mprotect = ttm_bo_vm_mprotect
>>   };
>>     void
>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>> b/drivers/gpu/drm/radeon/radeon_gem.c
>> index 458f92a70887..c19ad07eb7b5 100644
>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>> @@ -77,7 +77,8 @@ static const struct vm_operations_struct
>> radeon_gem_vm_ops = {
>>   .fault = radeon_gem_fault,
>>   .open = ttm_bo_vm_open,
>>   .close = ttm_bo_vm_close,
>> -    .access = ttm_bo_vm_access
>> +    .access = ttm_bo_vm_access,
>> +    .mprotect = ttm_bo_vm_mprotect
>>   };
>>     static void radeon_gem_object_free(struct drm_gem_object *gobj)
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> index f56be5bc0861..fb325bad5db6 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> @@ -542,17 +542,29 @@ int ttm_bo_vm_access(struct vm_area_struct
>> *vma, unsigned long addr,
>>   }
>>   EXPORT_SYMBOL(ttm_bo_vm_access);
>>   +int ttm_bo_vm_mprotect(struct vm_area_struct *vma, unsigned long
>> start,
>> +   unsigned long end, unsigned long newflags)
>> +{
>> +    /* Enforce no COW since would have really strange behavior with
>> it. */
>> +    if (is_cow_mapping(newflags) && (newflags & VM_WRITE))
>> +    return -EINVAL;
>> +
>> +    return 0;
>> +}
>> +EXPORT_SYMBOL(ttm_bo_vm_mprotect);
>> +
>>   static const struct vm_operations_struct ttm_bo_vm_ops = {
>>   .fault = ttm_bo_vm_fault,
>>   .open = ttm_bo_vm_open,
>>   .close = ttm_bo_vm_close,
>>   .access = ttm_bo_vm_access,
>> +    .mprotect = ttm_bo_vm_mprotect,
>>   };
>>     int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct
>> ttm_buffer_object *bo)
>>   {
>>   /* Enforce no COW since would have really strange behavior with
>> it. */
>> -    if (is_cow_mapping(vma->vm_flags))
>> +    if (is_cow_mapping(vma->vm_flags) && (vma->vm_flags & VM_WRITE))
>>   return -EINVAL;
>>     ttm_bo_get(bo);
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
>> b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
>> index e6b1f98ec99f..e4bf7dc99320 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
>> @@ -61,6 +61,7 @@ int vmw_mmap(struct file *filp, struct
>> vm_area_struct *vma)
>>   .fault = vmw_bo_vm_fault,
>>   .open = ttm_bo_vm_open,
>>   .close = ttm_bo_vm_close,
>> +    .mprotect = ttm_bo_vm_mprotect,
>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>   .huge_fault = vmw_bo_vm_huge_fault,
>>   #endif
>> diff --git a/include/drm/ttm/ttm_bo_api.h 

Re: [PATCH v3 1/1] drm/ttm: Fix COW check

2021-07-13 Thread Alex Deucher
On Tue, Jul 13, 2021 at 2:57 AM Christian König
 wrote:
>
>
>
> Am 13.07.21 um 00:06 schrieb Felix Kuehling:
> > KFD Thunk maps invisible VRAM BOs with PROT_NONE, MAP_PRIVATE.
> > is_cow_mapping returns true for these mappings. Add a check for
> > vm_flags & VM_WRITE to avoid mmap failures on private read-only or
> > PROT_NONE mappings.
> >
> > v2: protect against mprotect making a mapping writable after the fact
> > v3: update driver-specific vm_operations_structs
> >
> > Fixes: f91142c62161 ("drm/ttm: nuke VM_MIXEDMAP on BO mappings v3")
> > Signed-off-by: Felix Kuehling 
> > Signed-off-by: Alex Deucher 
>
> Reviewed-by: Christian König 

Are you planning to push this to drm-misc?

Alex

>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c  |  3 ++-
> >   drivers/gpu/drm/nouveau/nouveau_gem.c|  3 ++-
> >   drivers/gpu/drm/radeon/radeon_gem.c  |  3 ++-
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c  | 14 +-
> >   drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c |  1 +
> >   include/drm/ttm/ttm_bo_api.h |  4 
> >   6 files changed, 24 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > index b3404c43a911..1aa750a6a5d2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> > @@ -79,7 +79,8 @@ static const struct vm_operations_struct 
> > amdgpu_gem_vm_ops = {
> >   .fault = amdgpu_gem_fault,
> >   .open = ttm_bo_vm_open,
> >   .close = ttm_bo_vm_close,
> > - .access = ttm_bo_vm_access
> > + .access = ttm_bo_vm_access,
> > + .mprotect = ttm_bo_vm_mprotect
> >   };
> >
> >   static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
> > b/drivers/gpu/drm/nouveau/nouveau_gem.c
> > index 5b27845075a1..164ea564bb7a 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> > @@ -70,7 +70,8 @@ static const struct vm_operations_struct 
> > nouveau_ttm_vm_ops = {
> >   .fault = nouveau_ttm_fault,
> >   .open = ttm_bo_vm_open,
> >   .close = ttm_bo_vm_close,
> > - .access = ttm_bo_vm_access
> > + .access = ttm_bo_vm_access,
> > + .mprotect = ttm_bo_vm_mprotect
> >   };
> >
> >   void
> > diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
> > b/drivers/gpu/drm/radeon/radeon_gem.c
> > index 458f92a70887..c19ad07eb7b5 100644
> > --- a/drivers/gpu/drm/radeon/radeon_gem.c
> > +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> > @@ -77,7 +77,8 @@ static const struct vm_operations_struct 
> > radeon_gem_vm_ops = {
> >   .fault = radeon_gem_fault,
> >   .open = ttm_bo_vm_open,
> >   .close = ttm_bo_vm_close,
> > - .access = ttm_bo_vm_access
> > + .access = ttm_bo_vm_access,
> > + .mprotect = ttm_bo_vm_mprotect
> >   };
> >
> >   static void radeon_gem_object_free(struct drm_gem_object *gobj)
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
> > b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index f56be5bc0861..fb325bad5db6 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -542,17 +542,29 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, 
> > unsigned long addr,
> >   }
> >   EXPORT_SYMBOL(ttm_bo_vm_access);
> >
> > +int ttm_bo_vm_mprotect(struct vm_area_struct *vma, unsigned long start,
> > +unsigned long end, unsigned long newflags)
> > +{
> > + /* Enforce no COW since would have really strange behavior with it. */
> > + if (is_cow_mapping(newflags) && (newflags & VM_WRITE))
> > + return -EINVAL;
> > +
> > + return 0;
> > +}
> > +EXPORT_SYMBOL(ttm_bo_vm_mprotect);
> > +
> >   static const struct vm_operations_struct ttm_bo_vm_ops = {
> >   .fault = ttm_bo_vm_fault,
> >   .open = ttm_bo_vm_open,
> >   .close = ttm_bo_vm_close,
> >   .access = ttm_bo_vm_access,
> > + .mprotect = ttm_bo_vm_mprotect,
> >   };
> >
> >   int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object 
> > *bo)
> >   {
> >   /* Enforce no COW since would have really strange behavior with it. */
> > - if (is_cow_mapping(vma->vm_flags))
> > + if (is_cow_mapping(vma->vm_flags) && (vma->vm_flags & VM_WRITE))
> >   return -EINVAL;
> >
> >   ttm_bo_get(bo);
> > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c 
> > b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
> > index e6b1f98ec99f..e4bf7dc99320 100644
> > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
> > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
> > @@ -61,6 +61,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct 
> > *vma)
> >   .fault = vmw_bo_vm_fault,
> >   .open = ttm_bo_vm_open,
> >   .close = ttm_bo_vm_close,
> > + .mprotect = ttm_bo_vm_mprotect,
> >   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >   .huge_fault = vmw_bo_vm_huge_fault,
> >   #endif
> 

Re: [PATCH 2/2] drm/i915/gem: Migrate to system at dma-buf attach time (v5)

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 04:06:13PM +0100, Matthew Auld wrote:
> On Tue, 13 Jul 2021 at 15:44, Daniel Vetter  wrote:
> >
> > On Mon, Jul 12, 2021 at 06:12:34PM -0500, Jason Ekstrand wrote:
> > > From: Thomas Hellström 
> > >
> > > Until we support p2p dma or as a complement to that, migrate data
> > > to system memory at dma-buf attach time if possible.
> > >
> > > v2:
> > > - Rebase on dynamic exporter. Update the igt_dmabuf_import_same_driver
> > >   selftest to migrate if we are LMEM capable.
> > > v3:
> > > - Migrate also in the pin() callback.
> > > v4:
> > > - Migrate in attach
> > > v5: (jason)
> > > - Lock around the migration
> > >
> > > Signed-off-by: Thomas Hellström 
> > > Signed-off-by: Michael J. Ruhl 
> > > Reported-by: kernel test robot 
> > > Signed-off-by: Jason Ekstrand 
> > > Reviewed-by: Jason Ekstrand 
> > > ---
> > >  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 25 ++-
> > >  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  4 ++-
> > >  2 files changed, 27 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> > > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > index 9a655f69a0671..3163f00554476 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > @@ -170,8 +170,31 @@ static int i915_gem_dmabuf_attach(struct dma_buf 
> > > *dmabuf,
> > > struct dma_buf_attachment *attach)
> > >  {
> > >   struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> > > + struct i915_gem_ww_ctx ww;
> > > + int err;
> > > +
> > > + for_i915_gem_ww(, err, true) {
> > > + err = i915_gem_object_lock(obj, );
> > > + if (err)
> > > + continue;
> > > +
> > > + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
> > > + err = -EOPNOTSUPP;
> > > + continue;
> > > + }
> > > +
> > > + err = i915_gem_object_migrate(obj, , INTEL_REGION_SMEM);
> > > + if (err)
> > > + continue;
> > >
> > > - return i915_gem_object_pin_pages_unlocked(obj);
> > > + err = i915_gem_object_wait_migration(obj, 0);
> > > + if (err)
> > > + continue;
> > > +
> > > + err = i915_gem_object_pin_pages(obj);
> > > + }
> > > +
> > > + return err;
> > >  }
> > >
> > >  static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
> > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
> > > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > index 3dc0f8b3cdab0..4f7e77b1c0152 100644
> > > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > > @@ -106,7 +106,9 @@ static int igt_dmabuf_import_same_driver(void *arg)
> > >   int err;
> > >
> > >   force_different_devices = true;
> > > - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> > > + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
> >
> > I'm wondering (and couldn't answer) whether this creates an lmem+smem
> > buffer, since if we create an lmem-only buffer then the migration above
> > should fail.
> 
> It's lmem-only, but it's also a kernel internal object, so the
> migration path will still happily migrate it if asked. On the other
> hand if it's a userspace object then we always have to respect the
> placements.
> 
> I think for now the only usecase for that is in the selftests.

Yeah I've read the kerneldoc, it's all nicely documented but feels a bit
dangerous. What I proposed on irc:
- i915_gem_object_migrate does the placement check, i.e. as strict as
  can_migrate.
- A new __i915_gem_object_migrate is for selftest that do special stuff.
- In the import selftest we check that lmem-only fails (because we can't
  pin it into smem) for a non-dynamic importer, but lmem+smem works and
  gets migrated.
- Once we have dynamic dma-buf for p2p pci, then we'll have another
  selftest which checks that things work for lmem only if and only if the
  importer is dynamic and has set the allow_p2p flag.

We could also add the can_migrate check everywhere (including
dma_buf->attach), but that feels like the less save api.
-Daniel


> 
> >
> > Which I'm also not sure we have a testcase for that testcase either ...
> >
> > I tried to read some code here, but got a bit lost. Ideas?
> > -Daniel
> >
> > > + if (IS_ERR(obj))
> > > + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> > >   if (IS_ERR(obj))
> > >   goto out_ret;
> > >
> > > --
> > > 2.31.1
> > >
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 2/2] drm/i915/gem: Migrate to system at dma-buf attach time (v5)

2021-07-13 Thread Matthew Auld
On Tue, 13 Jul 2021 at 15:44, Daniel Vetter  wrote:
>
> On Mon, Jul 12, 2021 at 06:12:34PM -0500, Jason Ekstrand wrote:
> > From: Thomas Hellström 
> >
> > Until we support p2p dma or as a complement to that, migrate data
> > to system memory at dma-buf attach time if possible.
> >
> > v2:
> > - Rebase on dynamic exporter. Update the igt_dmabuf_import_same_driver
> >   selftest to migrate if we are LMEM capable.
> > v3:
> > - Migrate also in the pin() callback.
> > v4:
> > - Migrate in attach
> > v5: (jason)
> > - Lock around the migration
> >
> > Signed-off-by: Thomas Hellström 
> > Signed-off-by: Michael J. Ruhl 
> > Reported-by: kernel test robot 
> > Signed-off-by: Jason Ekstrand 
> > Reviewed-by: Jason Ekstrand 
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 25 ++-
> >  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  4 ++-
> >  2 files changed, 27 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > index 9a655f69a0671..3163f00554476 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > @@ -170,8 +170,31 @@ static int i915_gem_dmabuf_attach(struct dma_buf 
> > *dmabuf,
> > struct dma_buf_attachment *attach)
> >  {
> >   struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> > + struct i915_gem_ww_ctx ww;
> > + int err;
> > +
> > + for_i915_gem_ww(, err, true) {
> > + err = i915_gem_object_lock(obj, );
> > + if (err)
> > + continue;
> > +
> > + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
> > + err = -EOPNOTSUPP;
> > + continue;
> > + }
> > +
> > + err = i915_gem_object_migrate(obj, , INTEL_REGION_SMEM);
> > + if (err)
> > + continue;
> >
> > - return i915_gem_object_pin_pages_unlocked(obj);
> > + err = i915_gem_object_wait_migration(obj, 0);
> > + if (err)
> > + continue;
> > +
> > + err = i915_gem_object_pin_pages(obj);
> > + }
> > +
> > + return err;
> >  }
> >
> >  static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
> > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
> > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > index 3dc0f8b3cdab0..4f7e77b1c0152 100644
> > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> > @@ -106,7 +106,9 @@ static int igt_dmabuf_import_same_driver(void *arg)
> >   int err;
> >
> >   force_different_devices = true;
> > - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> > + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);
>
> I'm wondering (and couldn't answer) whether this creates an lmem+smem
> buffer, since if we create an lmem-only buffer then the migration above
> should fail.

It's lmem-only, but it's also a kernel internal object, so the
migration path will still happily migrate it if asked. On the other
hand if it's a userspace object then we always have to respect the
placements.

I think for now the only usecase for that is in the selftests.

>
> Which I'm also not sure we have a testcase for that testcase either ...
>
> I tried to read some code here, but got a bit lost. Ideas?
> -Daniel
>
> > + if (IS_ERR(obj))
> > + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> >   if (IS_ERR(obj))
> >   goto out_ret;
> >
> > --
> > 2.31.1
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch


Re: [PATCH 2/2] drm/i915/gem: Migrate to system at dma-buf attach time (v5)

2021-07-13 Thread Daniel Vetter
On Mon, Jul 12, 2021 at 06:12:34PM -0500, Jason Ekstrand wrote:
> From: Thomas Hellström 
> 
> Until we support p2p dma or as a complement to that, migrate data
> to system memory at dma-buf attach time if possible.
> 
> v2:
> - Rebase on dynamic exporter. Update the igt_dmabuf_import_same_driver
>   selftest to migrate if we are LMEM capable.
> v3:
> - Migrate also in the pin() callback.
> v4:
> - Migrate in attach
> v5: (jason)
> - Lock around the migration
> 
> Signed-off-by: Thomas Hellström 
> Signed-off-by: Michael J. Ruhl 
> Reported-by: kernel test robot 
> Signed-off-by: Jason Ekstrand 
> Reviewed-by: Jason Ekstrand 
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c| 25 ++-
>  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  4 ++-
>  2 files changed, 27 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index 9a655f69a0671..3163f00554476 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -170,8 +170,31 @@ static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
> struct dma_buf_attachment *attach)
>  {
>   struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> + struct i915_gem_ww_ctx ww;
> + int err;
> +
> + for_i915_gem_ww(, err, true) {
> + err = i915_gem_object_lock(obj, );
> + if (err)
> + continue;
> +
> + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) {
> + err = -EOPNOTSUPP;
> + continue;
> + }
> +
> + err = i915_gem_object_migrate(obj, , INTEL_REGION_SMEM);
> + if (err)
> + continue;
>  
> - return i915_gem_object_pin_pages_unlocked(obj);
> + err = i915_gem_object_wait_migration(obj, 0);
> + if (err)
> + continue;
> +
> + err = i915_gem_object_pin_pages(obj);
> + }
> +
> + return err;
>  }
>  
>  static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c 
> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> index 3dc0f8b3cdab0..4f7e77b1c0152 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> @@ -106,7 +106,9 @@ static int igt_dmabuf_import_same_driver(void *arg)
>   int err;
>  
>   force_different_devices = true;
> - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
> + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0);

I'm wondering (and couldn't answer) whether this creates an lmem+smem
buffer, since if we create an lmem-only buffer then the migration above
should fail.

Which I'm also not sure we have a testcase for that testcase either ...

I tried to read some code here, but got a bit lost. Ideas?
-Daniel

> + if (IS_ERR(obj))
> + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
>   if (IS_ERR(obj))
>   goto out_ret;
>  
> -- 
> 2.31.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 1/2] drm/i915/gem: Correct the locking and pin pattern for dma-buf (v5)

2021-07-13 Thread Jason Ekstrand
On Tue, Jul 13, 2021 at 9:40 AM Daniel Vetter  wrote:
>
> On Mon, Jul 12, 2021 at 06:12:33PM -0500, Jason Ekstrand wrote:
> > From: Thomas Hellström 
> >
> > If our exported dma-bufs are imported by another instance of our driver,
> > that instance will typically have the imported dma-bufs locked during
> > dma_buf_map_attachment(). But the exporter also locks the same reservation
> > object in the map_dma_buf() callback, which leads to recursive locking.
> >
> > So taking the lock inside _pin_pages_unlocked() is incorrect.
> >
> > Additionally, the current pinning code path is contrary to the defined
> > way that pinning should occur.
> >
> > Remove the explicit pin/unpin from the map/umap functions and move them
> > to the attach/detach allowing correct locking to occur, and to match
> > the static dma-buf drm_prime pattern.
> >
> > Add a live selftest to exercise both dynamic and non-dynamic
> > exports.
> >
> > v2:
> > - Extend the selftest with a fake dynamic importer.
> > - Provide real pin and unpin callbacks to not abuse the interface.
> > v3: (ruhl)
> > - Remove the dynamic export support and move the pinning into the
> >   attach/detach path.
> > v4: (ruhl)
> > - Put pages does not need to assert on the dma-resv
> > v5: (jason)
> > - Lock around dma_buf_unmap_attachment() when emulating a dynamic
> >   importer in the subtests.
> > - Use pin_pages_unlocked
> >
> > Reported-by: Michael J. Ruhl 
> > Signed-off-by: Thomas Hellström 
> > Signed-off-by: Michael J. Ruhl 
> > Signed-off-by: Jason Ekstrand 
> > Reviewed-by: Jason Ekstrand 
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  43 +--
> >  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  | 118 +-
> >  2 files changed, 147 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > index 616c3a2f1baf0..9a655f69a0671 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > @@ -12,6 +12,8 @@
> >  #include "i915_gem_object.h"
> >  #include "i915_scatterlist.h"
> >
> > +I915_SELFTEST_DECLARE(static bool force_different_devices;)
> > +
> >  static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
> >  {
> >   return to_intel_bo(buf->priv);
> > @@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
> > dma_buf_attachment *attachme
> >   struct scatterlist *src, *dst;
> >   int ret, i;
> >
> > - ret = i915_gem_object_pin_pages_unlocked(obj);
> > - if (ret)
> > - goto err;
> > -
> >   /* Copy sg so that we make an independent mapping */
> >   st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
> >   if (st == NULL) {
> >   ret = -ENOMEM;
> > - goto err_unpin_pages;
> > + goto err;
> >   }
> >
> >   ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
> > @@ -58,8 +56,6 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
> > dma_buf_attachment *attachme
> >   sg_free_table(st);
> >  err_free:
> >   kfree(st);
> > -err_unpin_pages:
> > - i915_gem_object_unpin_pages(obj);
> >  err:
> >   return ERR_PTR(ret);
> >  }
> > @@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct 
> > dma_buf_attachment *attachment,
> >  struct sg_table *sg,
> >  enum dma_data_direction dir)
> >  {
> > - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
> > -
> >   dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC);
> >   sg_free_table(sg);
> >   kfree(sg);
> > -
> > - i915_gem_object_unpin_pages(obj);
> >  }
> >
> >  static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct 
> > dma_buf_map *map)
> > @@ -168,7 +160,31 @@ static int i915_gem_end_cpu_access(struct dma_buf 
> > *dma_buf, enum dma_data_direct
> >   return err;
> >  }
> >
> > +/**
> > + * i915_gem_dmabuf_attach - Do any extra attach work necessary
> > + * @dmabuf: imported dma-buf
> > + * @attach: new attach to do work on
> > + *
> > + */
> > +static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
> > +   struct dma_buf_attachment *attach)
> > +{
> > + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> > +
> > + return i915_gem_object_pin_pages_unlocked(obj);
> > +}
> > +
> > +static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
> > +struct dma_buf_attachment *attach)
> > +{
> > + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> > +
> > + i915_gem_object_unpin_pages(obj);
> > +}
> > +
> >  static const struct dma_buf_ops i915_dmabuf_ops =  {
> > + .attach = i915_gem_dmabuf_attach,
> > + .detach = i915_gem_dmabuf_detach,
> >   .map_dma_buf = i915_gem_map_dma_buf,
> >   .unmap_dma_buf = i915_gem_unmap_dma_buf,
> >   .release = 

Re: [PATCH 1/2] drm/i915/gem: Correct the locking and pin pattern for dma-buf (v5)

2021-07-13 Thread Daniel Vetter
On Mon, Jul 12, 2021 at 06:12:33PM -0500, Jason Ekstrand wrote:
> From: Thomas Hellström 
> 
> If our exported dma-bufs are imported by another instance of our driver,
> that instance will typically have the imported dma-bufs locked during
> dma_buf_map_attachment(). But the exporter also locks the same reservation
> object in the map_dma_buf() callback, which leads to recursive locking.
> 
> So taking the lock inside _pin_pages_unlocked() is incorrect.
> 
> Additionally, the current pinning code path is contrary to the defined
> way that pinning should occur.
> 
> Remove the explicit pin/unpin from the map/umap functions and move them
> to the attach/detach allowing correct locking to occur, and to match
> the static dma-buf drm_prime pattern.
> 
> Add a live selftest to exercise both dynamic and non-dynamic
> exports.
> 
> v2:
> - Extend the selftest with a fake dynamic importer.
> - Provide real pin and unpin callbacks to not abuse the interface.
> v3: (ruhl)
> - Remove the dynamic export support and move the pinning into the
>   attach/detach path.
> v4: (ruhl)
> - Put pages does not need to assert on the dma-resv
> v5: (jason)
> - Lock around dma_buf_unmap_attachment() when emulating a dynamic
>   importer in the subtests.
> - Use pin_pages_unlocked
> 
> Reported-by: Michael J. Ruhl 
> Signed-off-by: Thomas Hellström 
> Signed-off-by: Michael J. Ruhl 
> Signed-off-by: Jason Ekstrand 
> Reviewed-by: Jason Ekstrand 
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c|  43 +--
>  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  | 118 +-
>  2 files changed, 147 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index 616c3a2f1baf0..9a655f69a0671 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -12,6 +12,8 @@
>  #include "i915_gem_object.h"
>  #include "i915_scatterlist.h"
>  
> +I915_SELFTEST_DECLARE(static bool force_different_devices;)
> +
>  static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
>  {
>   return to_intel_bo(buf->priv);
> @@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
> dma_buf_attachment *attachme
>   struct scatterlist *src, *dst;
>   int ret, i;
>  
> - ret = i915_gem_object_pin_pages_unlocked(obj);
> - if (ret)
> - goto err;
> -
>   /* Copy sg so that we make an independent mapping */
>   st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
>   if (st == NULL) {
>   ret = -ENOMEM;
> - goto err_unpin_pages;
> + goto err;
>   }
>  
>   ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
> @@ -58,8 +56,6 @@ static struct sg_table *i915_gem_map_dma_buf(struct 
> dma_buf_attachment *attachme
>   sg_free_table(st);
>  err_free:
>   kfree(st);
> -err_unpin_pages:
> - i915_gem_object_unpin_pages(obj);
>  err:
>   return ERR_PTR(ret);
>  }
> @@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct 
> dma_buf_attachment *attachment,
>  struct sg_table *sg,
>  enum dma_data_direction dir)
>  {
> - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
> -
>   dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC);
>   sg_free_table(sg);
>   kfree(sg);
> -
> - i915_gem_object_unpin_pages(obj);
>  }
>  
>  static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map 
> *map)
> @@ -168,7 +160,31 @@ static int i915_gem_end_cpu_access(struct dma_buf 
> *dma_buf, enum dma_data_direct
>   return err;
>  }
>  
> +/**
> + * i915_gem_dmabuf_attach - Do any extra attach work necessary
> + * @dmabuf: imported dma-buf
> + * @attach: new attach to do work on
> + *
> + */
> +static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
> +   struct dma_buf_attachment *attach)
> +{
> + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> +
> + return i915_gem_object_pin_pages_unlocked(obj);
> +}
> +
> +static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
> +struct dma_buf_attachment *attach)
> +{
> + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
> +
> + i915_gem_object_unpin_pages(obj);
> +}
> +
>  static const struct dma_buf_ops i915_dmabuf_ops =  {
> + .attach = i915_gem_dmabuf_attach,
> + .detach = i915_gem_dmabuf_detach,
>   .map_dma_buf = i915_gem_map_dma_buf,
>   .unmap_dma_buf = i915_gem_unmap_dma_buf,
>   .release = drm_gem_dmabuf_release,
> @@ -204,6 +220,8 @@ static int i915_gem_object_get_pages_dmabuf(struct 
> drm_i915_gem_object *obj)
>   struct sg_table *pages;
>   unsigned int sg_page_sizes;
>  
> + assert_object_held(obj);
> +
>   pages = dma_buf_map_attachment(obj->base.import_attach,
>  

[PATCH v2 3/4] drm/amd/display: Add control mechanism for FPU utilization

2021-07-13 Thread Rodrigo Siqueira
DC invokes DC_FPU_START/END in multiple parts of the code; this can
create a situation where we invoke this FPU operation in a nested way or
exit too early. For avoiding this situation, this commit adds a
mechanism where dc_fpu_begin/end manages the access to
kernel_fpu_begin/end.

Change since V1:
- Use a better variable names
- Use get_cpu_ptr and put_cpu_ptr to better balance preemption enable
and disable

Signed-off-by: Rodrigo Siqueira 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   | 13 ---
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 36 ---
 drivers/gpu/drm/amd/display/dc/dc_trace.h |  4 +--
 3 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index 230bb12c405e..fdcaea22b456 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -638,23 +638,26 @@ TRACE_EVENT(amdgpu_refresh_rate_track,
 );
 
 TRACE_EVENT(dcn_fpu,
-   TP_PROTO(bool begin, const char *function, const int line),
-   TP_ARGS(begin, function, line),
+   TP_PROTO(bool begin, const char *function, const int line, const 
int recursion_depth),
+   TP_ARGS(begin, function, line, recursion_depth),
 
TP_STRUCT__entry(
 __field(bool, begin)
 __field(const char *, function)
 __field(int, line)
+__field(int, recursion_depth)
),
TP_fast_assign(
   __entry->begin = begin;
   __entry->function = function;
   __entry->line = line;
+  __entry->recursion_depth = recursion_depth;
),
-   TP_printk("%s()+%d: %s",
+   TP_printk("%s: recursion_depth: %d: %s()+%d:",
+ __entry->begin ? "begin" : "end",
+ __entry->recursion_depth,
  __entry->function,
- __entry->line,
- __entry->begin ? "begin" : "end"
+ __entry->line
)
 );
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index d5d156a4517e..73179e9e859a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -28,6 +28,19 @@
 
 #include 
 
+/**
+ * DOC: DC FPU manipulation overview
+ *
+ * DC core uses FPU operations in multiple parts of the code, which requires a
+ * more specialized way to manage these areas' entrance. To fulfill this
+ * requirement, we created some wrapper functions that encapsulate
+ * kernel_fpu_begin/end to better fit our need in the display component. In
+ * summary, in this file, you can find functions related to FPU operation
+ * management.
+ */
+
+static DEFINE_PER_CPU(int, fpu_recursion_depth);
+
 /**
  * dc_fpu_begin - Enables FPU protection
  * @function_name: A string containing the function name for debug purposes
@@ -43,8 +56,16 @@
  */
 void dc_fpu_begin(const char *function_name, const int line)
 {
-   TRACE_DCN_FPU(true, function_name, line);
-   kernel_fpu_begin();
+   int *pcpu;
+
+   pcpu = get_cpu_ptr(_recursion_depth);
+   *pcpu = this_cpu_inc_return(fpu_recursion_depth);
+
+   if (*pcpu == 1)
+   kernel_fpu_begin();
+
+   TRACE_DCN_FPU(true, function_name, line, *pcpu);
+   put_cpu_ptr(_recursion_depth);
 }
 
 /**
@@ -59,6 +80,13 @@ void dc_fpu_begin(const char *function_name, const int line)
  */
 void dc_fpu_end(const char *function_name, const int line)
 {
-   TRACE_DCN_FPU(false, function_name, line);
-   kernel_fpu_end();
+   int *pcpu;
+
+   pcpu = get_cpu_ptr(_recursion_depth);
+   *pcpu = this_cpu_dec_return(fpu_recursion_depth);
+   if (*pcpu <= 0)
+   kernel_fpu_end();
+
+   TRACE_DCN_FPU(false, function_name, line, *pcpu);
+   put_cpu_ptr(_recursion_depth);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dc_trace.h 
b/drivers/gpu/drm/amd/display/dc/dc_trace.h
index d598ba697e45..c711797e5c9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_trace.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_trace.h
@@ -38,5 +38,5 @@
 #define TRACE_DCN_CLOCK_STATE(dcn_clocks) \
trace_amdgpu_dm_dc_clocks_state(dcn_clocks)
 
-#define TRACE_DCN_FPU(begin, function, line) \
-   trace_dcn_fpu(begin, function, line)
+#define TRACE_DCN_FPU(begin, function, line, ref_count) \
+   trace_dcn_fpu(begin, function, line, ref_count)
-- 
2.25.1



[PATCH v2 4/4] drm/amd/display: Add DC_FP helper to check FPU state

2021-07-13 Thread Rodrigo Siqueira
To fully isolate FPU operations in a single place, we must avoid
situations where compilers spill FP values to registers due to FP enable
in a specific C file. Note that even if we isolate all FPU functions in
a single file and call its interface from other files, the compiler
might enable the use of FPU before we call DC_FP_START. Nevertheless, it
is the programmer's responsibility to invoke DC_FP_START/END in the
correct place. To highlight situations where developers forgot to use
the FP protection before calling the DC FPU interface functions, we
introduce a helper that checks if the function is invoked under FP
protection. If not, it will trigger a kernel warning.

Changes since V1:
- Remove fp_enable variables
- Rename dc_is_fp_enabled to dc_assert_fp_enabled
- Replace wrong variable type

Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 22 +++
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h|  1 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  2 ++
 .../drm/amd/display/dc/fpu_operations/dcn2x.c | 17 ++
 4 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 73179e9e859a..74153a2816f9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -41,6 +41,28 @@
 
 static DEFINE_PER_CPU(int, fpu_recursion_depth);
 
+/**
+ * dc_assert_fp_enabled - Check if FPU protection is enabled
+ *
+ * This function tells if the code is already under FPU protection or not. A
+ * function that works as an API for a set of FPU operations can use this
+ * function for checking if the caller invoked it after DC_FP_START(). For
+ * example, take a look at dcn2x.c file.
+ *
+ * Return:
+ * Return true if we already enabled FPU protection, otherwise return false.
+ */
+inline bool dc_assert_fp_enabled(void)
+{
+   int *pcpu, depth = 0;
+
+   pcpu = get_cpu_ptr(_recursion_depth);
+   depth = this_cpu_read(fpu_recursion_depth);
+   put_cpu_ptr(_recursion_depth);
+
+   return depth > 1;
+}
+
 /**
  * dc_fpu_begin - Enables FPU protection
  * @function_name: A string containing the function name for debug purposes
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
index fb54983c5c60..97941794b77c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h
@@ -27,6 +27,7 @@
 #ifndef __DC_FPU_H__
 #define __DC_FPU_H__
 
+bool dc_assert_fp_enabled(void);
 void dc_fpu_begin(const char *function_name, const int line);
 void dc_fpu_end(const char *function_name, const int line);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index f99b09643a52..d0b34c7f99dc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -2355,7 +2355,9 @@ int dcn20_populate_dml_pipes_from_context(
}
 
/* populate writeback information */
+   DC_FP_START();
dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, 
pipes);
+   DC_FP_END();
 
return pipe_cnt;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c 
b/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
index c815d6c01d64..d8183da0c2b0 100644
--- a/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
+++ b/drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
@@ -41,6 +41,22 @@
  *that deals with FP register is contained within this call.
  * 3. All function that needs to be accessed outside this file requires a
  *public interface that not uses any FPU reference.
+ * 4. Developers should not use DC_FP_START/END in this file, but they need to
+ *ensure that the caller invokes it before access any function available in
+ *this file. For this reason, public API in this file must invoke
+ *ASSERT(dc_assert_fp_enabled());
+ *
+ * Let's expand a little bit more the idea in the code pattern number for. To
+ * fully isolate FPU operations in a single place, we must avoid situations
+ * where compilers spill FP values to registers due to FP enable in a specific
+ * C file. Note that even if we isolate all FPU functions in a single file and
+ * call its interface from other files, the compiler might enable the use of
+ * FPU before we call DC_FP_START. Nevertheless, it is the programmer's
+ * responsibility to invoke DC_FP_START/END in the correct place. To highlight
+ * situations where developers forgot to use the FP protection before calling
+ * the DC FPU interface functions, we introduce a helper that checks if the
+ * function is invoked under FP protection. If not, it will trigger a kernel
+ * warning.
  */
 
 static noinline void _dcn20_populate_dml_writeback_from_context(struct dc *dc,
@@ -83,5 +99,6 @@ static 

[PATCH v2 2/4] drm/amd/display: Add FPU event trace

2021-07-13 Thread Rodrigo Siqueira
We don't have any mechanism for tracing FPU operations inside the
display core, making the debug work a little bit tricky. This commit
introduces a trace mechanism inside our DC_FP_START/END macros for
trying to alleviate this problem.

Signed-off-by: Rodrigo Siqueira 
---
 .../gpu/drm/amd/display/amdgpu_dm/Makefile|  3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   | 21 ++
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 64 +++
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h| 33 ++
 drivers/gpu/drm/amd/display/dc/dc_trace.h |  3 +
 drivers/gpu/drm/amd/display/dc/os_types.h |  6 +-
 6 files changed, 126 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile 
b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
index 91fb72c96545..5f7fd4474379 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile
@@ -25,7 +25,8 @@
 
 
 
-AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o
+AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o 
\
+   dc_fpu.o
 
 ifneq ($(CONFIG_DRM_AMD_DC),)
 AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o 
amdgpu_dm_psr.o
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index 46a33f64cf8e..230bb12c405e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -637,6 +637,27 @@ TRACE_EVENT(amdgpu_refresh_rate_track,
  __entry->refresh_rate_ns)
 );
 
+TRACE_EVENT(dcn_fpu,
+   TP_PROTO(bool begin, const char *function, const int line),
+   TP_ARGS(begin, function, line),
+
+   TP_STRUCT__entry(
+__field(bool, begin)
+__field(const char *, function)
+__field(int, line)
+   ),
+   TP_fast_assign(
+  __entry->begin = begin;
+  __entry->function = function;
+  __entry->line = line;
+   ),
+   TP_printk("%s()+%d: %s",
+ __entry->function,
+ __entry->line,
+ __entry->begin ? "begin" : "end"
+   )
+);
+
 #endif /* _AMDGPU_DM_TRACE_H_ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
new file mode 100644
index ..d5d156a4517e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dc_trace.h"
+
+#include 
+
+/**
+ * dc_fpu_begin - Enables FPU protection
+ * @function_name: A string containing the function name for debug purposes
+ *   (usually __func__)
+ *
+ * @line: A line number where DC_FP_START was invoked for debug purpose
+ *   (usually __LINE__)
+ *
+ * This function is responsible for managing the use of kernel_fpu_begin() with
+ * the advantage of providing an event trace for debugging.
+ *
+ * Note: Do not call this function directly; always use DC_FP_START().
+ */
+void dc_fpu_begin(const char *function_name, const int line)
+{
+   TRACE_DCN_FPU(true, function_name, line);
+   kernel_fpu_begin();
+}
+
+/**
+ * dc_fpu_end - Disable FPU protection
+ * @function_name: A string containing the function name for debug purposes
+ * @line: A-line number where DC_FP_END was invoked for debug purpose
+ *
+ * This function is responsible for managing the use of kernel_fpu_end() 

[PATCH v2 1/4] drm/amd/display: Introduce FPU directory inside DC

2021-07-13 Thread Rodrigo Siqueira
The display core files rely on FPU operation, which requires to be
compiled with special flags. Ideally, we don't want these FPU operations
spread around the DC code; nevertheless, it happens in the current
source. This commit introduces a new directory named fpu_operations that
intends to centralize all files that require the FPU compilation flag.
As part of this new component, this patch also moves one of the
functions that require FPU access to a single shared file. Notice that
this is the first part of the work, and it does not fix the FPU issue
yet; we still need other patches for achieving the complete isolation of
this file.

Change since V1:
- Update documentation and rebase.

Signed-off-by: Rodrigo Siqueira 
---
 drivers/gpu/drm/amd/display/dc/Makefile   |  1 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c | 39 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.h |  2 -
 .../drm/amd/display/dc/dcn21/dcn21_resource.c |  2 +
 .../amd/display/dc/fpu_operations/Makefile| 58 +
 .../drm/amd/display/dc/fpu_operations/dcn2x.c | 87 +++
 .../drm/amd/display/dc/fpu_operations/dcn2x.h | 33 +++
 7 files changed, 183 insertions(+), 39 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/Makefile
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/fpu_operations/dcn2x.h

diff --git a/drivers/gpu/drm/amd/display/dc/Makefile 
b/drivers/gpu/drm/amd/display/dc/Makefile
index 943fcb164876..93e731a9be68 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -37,6 +37,7 @@ DC_LIBS += dcn303
 DC_LIBS += dcn31
 endif
 
+DC_LIBS += fpu_operations
 DC_LIBS += dce120
 
 DC_LIBS += dce112
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 1b05a37b674d..f99b09643a52 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -35,6 +35,8 @@
 #include "include/irq_service_interface.h"
 #include "dcn20/dcn20_resource.h"
 
+#include "fpu_operations/dcn2x.h"
+
 #include "dcn10/dcn10_hubp.h"
 #include "dcn10/dcn10_ipp.h"
 #include "dcn20_hubbub.h"
@@ -1974,43 +1976,6 @@ void dcn20_split_stream_for_mpc(
ASSERT(primary_pipe->plane_state);
 }
 
-void dcn20_populate_dml_writeback_from_context(
-   struct dc *dc, struct resource_context *res_ctx, 
display_e2e_pipe_params_st *pipes)
-{
-   int pipe_cnt, i;
-
-   for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
-   struct dc_writeback_info *wb_info = 
_ctx->pipe_ctx[i].stream->writeback_info[0];
-
-   if (!res_ctx->pipe_ctx[i].stream)
-   continue;
-
-   /* Set writeback information */
-   pipes[pipe_cnt].dout.wb_enable = (wb_info->wb_enabled == true) 
? 1 : 0;
-   pipes[pipe_cnt].dout.num_active_wb++;
-   pipes[pipe_cnt].dout.wb.wb_src_height = 
wb_info->dwb_params.cnv_params.crop_height;
-   pipes[pipe_cnt].dout.wb.wb_src_width = 
wb_info->dwb_params.cnv_params.crop_width;
-   pipes[pipe_cnt].dout.wb.wb_dst_width = 
wb_info->dwb_params.dest_width;
-   pipes[pipe_cnt].dout.wb.wb_dst_height = 
wb_info->dwb_params.dest_height;
-   pipes[pipe_cnt].dout.wb.wb_htaps_luma = 1;
-   pipes[pipe_cnt].dout.wb.wb_vtaps_luma = 1;
-   pipes[pipe_cnt].dout.wb.wb_htaps_chroma = 
wb_info->dwb_params.scaler_taps.h_taps_c;
-   pipes[pipe_cnt].dout.wb.wb_vtaps_chroma = 
wb_info->dwb_params.scaler_taps.v_taps_c;
-   pipes[pipe_cnt].dout.wb.wb_hratio = 1.0;
-   pipes[pipe_cnt].dout.wb.wb_vratio = 1.0;
-   if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) {
-   if (wb_info->dwb_params.output_depth == 
DWB_OUTPUT_PIXEL_DEPTH_8BPC)
-   pipes[pipe_cnt].dout.wb.wb_pixel_format = 
dm_420_8;
-   else
-   pipes[pipe_cnt].dout.wb.wb_pixel_format = 
dm_420_10;
-   } else
-   pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_444_32;
-
-   pipe_cnt++;
-   }
-
-}
-
 int dcn20_populate_dml_pipes_from_context(
struct dc *dc,
struct dc_state *context,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
index c8f3127bbcdf..6ec8ff45f0f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h
@@ -58,8 +58,6 @@ struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer(
struct dc_state *state,
const struct resource_pool *pool,
struct dc_stream_state *stream);
-void 

[PATCH v2 0/4] drm/amd/display: Base changes for isolating FPU operation in a single place

2021-07-13 Thread Rodrigo Siqueira
Hi,

In the display core, we utilize floats and doubles units for calculating
modesetting parameters. One side effect of our approach to use double-precision
is the fact that we spread multiple FPU access across our driver, which means
that we can accidentally clobber user space FPU state.

# Challenges

1. Keep in mind that this FPU code is ingrained in our display driver and
performs several crucial tasks. Additionally, we already have multiple
architectures available in the kernel and a large set of users; in other words,
we prefer to avoid a radical approach that might break our user's system.

2. We share our display code with other OSs; thus, we need to maintain the
interoperability between these two systems.

3. We need a mechanism for identifying which function uses FPU registers;
fortunately, Peter Zijlstra wrote a series a couple of months ago where he
introduced an FPU check for objtool. I used the following command for
identifying the potential FPU usage:

 ./tools/objtool/objtool check -Ffa "drivers/gpu/drm/amd/display/dc/ANY_FILE.o"

4. Since our code heavily relies on FPU and the fact that we spread
kernel_fpu_begin/end across multiple functions, we can have some complex
scenarios that will require code refactoring. However, we want to avoid
complicated changes since this is a formula to introduce regressions; we want
something that allows us to fix it in small, safe, and reliable steps.

5. Unfortunately, for legacy reasons, we have some problems in how we program
our FPU access, which in some weird scenarios can generate situations where we
try to enter in the fpu mode multiple times or exit too early.

# Our approach

For trying to solve this problem, we came up with the following strategy:

1. Keep in mind that we are using kernel_fpu_begin/end spread in various areas
and sometimes across multiple functions. If we try to move some of the
functions to an isolated place, we can generate a situation where we can call
the FPU protection more than once, causing multiple warnings. We can deal with
this problem by adding a thin management layer around the kernel_fpu_begin/end
used inside the display.

2. We will need a trace mechanism for this FPU management inside our display
code.

3. After we get the thin layer that manages FPU, we can start to move each
function that uses FPU to a centralized place. Our DQE runs multiple tests in
different ASICs every week; we can take advantage of this to ensure that our
FPU patches work does not introduce any regression. The idea is to work on a
specific part of the code every week (e.g., week 1: DCN2, week 1: DCN2.1,
etc.).

4. Finally, after we can isolate the FPU operations in a single place, we can
altogether remove the FPU flags from other files and eliminate an unnecessary
code introduced to deal with this problem. We can also remove the thin layer
added in the step 3.

# This series

To maintain the interoperability between multiple OSes, we already have a
define named DC_FP_START/END, which is a straightforward wrapper to
kernel_fpu_begin/end in the Linux side. In this series, I decided to expand the
scope of this DC_FP_* wrapper to trace FPU entrance and exit in the display
code, but I also add a mechanism for managing the entrance and exit of
kernel_fpu_begin/end. You can see the details on how I did that in the last two
patches.

I also isolate a simple function that requires FPU access to demonstrate my
strategy for isolating this FPU access in a single place. If this series gets
accepted, the following steps consist of moving all FPU functions weekly until
we isolate everything in the fpu_operation folder.

Changes since V1:
- Use a better name for variables.
- Update documentation.
- Avoid preemption.

* See update details per commit message

Best Regards
Rodrigo Siqueira

Rodrigo Siqueira (4):
  drm/amd/display: Introduce FPU directory inside DC
  drm/amd/display: Add FPU event trace
  drm/amd/display: Add control mechanism for FPU utilization
  drm/amd/display: Add DC_FP helper to check FPU state

 .../gpu/drm/amd/display/amdgpu_dm/Makefile|   3 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   |  24 
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c| 114 ++
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.h|  34 ++
 drivers/gpu/drm/amd/display/dc/Makefile   |   1 +
 drivers/gpu/drm/amd/display/dc/dc_trace.h |   3 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |  41 +--
 .../drm/amd/display/dc/dcn20/dcn20_resource.h |   2 -
 .../drm/amd/display/dc/dcn21/dcn21_resource.c |   2 +
 .../amd/display/dc/fpu_operations/Makefile|  58 +
 .../drm/amd/display/dc/fpu_operations/dcn2x.c | 104 
 .../drm/amd/display/dc/fpu_operations/dcn2x.h |  33 +
 drivers/gpu/drm/amd/display/dc/os_types.h |   6 +-
 13 files changed, 382 insertions(+), 43 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
 create mode 100644 

[PATCH v2] gpu: ipu-v3: use swap()

2021-07-13 Thread Salah Triki
Use swap() instead of implementing it since it makes code cleaner.

Signed-off-by: Salah Triki 
---

Changes since v1:
- Remove the declaration of tmp
- Fix typo in the description

 drivers/gpu/ipu-v3/ipu-image-convert.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index aa1d4b6d278f..af1612044eef 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -990,7 +990,7 @@ static int calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
const struct ipu_image_pixfmt *fmt = image->fmt;
unsigned int row, col, tile = 0;
u32 H, top, y_stride, uv_stride;
-   u32 uv_row_off, uv_col_off, uv_off, u_off, v_off, tmp;
+   u32 uv_row_off, uv_col_off, uv_off, u_off, v_off;
u32 y_row_off, y_col_off, y_off;
u32 y_size, uv_size;
 
@@ -1021,11 +1021,8 @@ static int calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
 
u_off = y_size - y_off + uv_off;
v_off = (fmt->uv_packed) ? 0 : u_off + uv_size;
-   if (fmt->uv_swapped) {
-   tmp = u_off;
-   u_off = v_off;
-   v_off = tmp;
-   }
+   if (fmt->uv_swapped)
+   swap(u_off, v_off);
 
image->tile[tile].offset = y_off;
image->tile[tile].u_off = u_off;
-- 
2.25.1



[PATCH] drm/fb-helper: Try to protect cleanup against delayed setup

2021-07-13 Thread Daniel Vetter
Some vague evidences suggests this can go wrong. Try to prevent it by
holding the right mutex and clearing ->deferred_setup to make sure we
later on don't accidentally try to re-register the fbdev when the
driver thought it had it all cleaned up already.

v2: I realized that this is fundamentally butchered, and CI complained
about lockdep splats. So limit the critical section again and just add
a few notes what the proper fix is.

References: 
https://intel-gfx-ci.01.org/tree/linux-next/next-20201215/fi-byt-j1900/igt@i915_pm_...@module-reload.html
Signed-off-by: Daniel Vetter 
Cc: Ville Syrjälä 
Cc: Chris Wilson 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Cc: David Airlie 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/drm_fb_helper.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 9d82fda274eb..8f11e5abb222 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -598,6 +598,9 @@ EXPORT_SYMBOL(drm_fb_helper_alloc_fbi);
  * A wrapper around unregister_framebuffer, to release the fb_info
  * framebuffer device. This must be called before releasing all resources for
  * @fb_helper by calling drm_fb_helper_fini().
+ *
+ * Note that this is fundamentally racy on hotunload because it doen't handle
+ * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
  */
 void drm_fb_helper_unregister_fbi(struct drm_fb_helper *fb_helper)
 {
@@ -611,6 +614,9 @@ EXPORT_SYMBOL(drm_fb_helper_unregister_fbi);
  * @fb_helper: driver-allocated fbdev helper, can be NULL
  *
  * This cleans up all remaining resources associated with @fb_helper.
+ *
+ * Note that this is fundamentally racy on hotunload because it doen't handle
+ * open fbdev file descriptors at all. Use drm_fbdev_generic_setup() instead.
  */
 void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 {
@@ -2382,6 +2388,10 @@ static void drm_fbdev_client_unregister(struct 
drm_client_dev *client)
 {
struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
 
+   mutex_lock(_helper->lock);
+   fb_helper->deferred_setup = false;
+   mutex_unlock(_helper->lock);
+
if (fb_helper->fbdev)
/* drm_fbdev_fb_destroy() takes care of cleanup */
drm_fb_helper_unregister_fbi(fb_helper);
-- 
2.32.0



Re: [PATCH 1/2] drm: add crtc background color property

2021-07-13 Thread Harry Wentland



On 2021-07-13 3:52 a.m., Pekka Paalanen wrote:
> On Mon, 12 Jul 2021 12:15:59 -0400
> Harry Wentland  wrote:
> 
>> On 2021-07-12 4:03 a.m., Pekka Paalanen wrote:
>>> On Fri, 9 Jul 2021 18:23:26 +0200
>>> Raphael Gallais-Pou  wrote:
>>>   
 On 7/9/21 10:04 AM, Pekka Paalanen wrote:  
> On Wed, 7 Jul 2021 08:48:47 +
> Raphael GALLAIS-POU - foss  wrote:
>
>> Some display controllers can be programmed to present non-black colors
>> for pixels not covered by any plane (or pixels covered by the
>> transparent regions of higher planes).  Compositors that want a UI with
>> a solid color background can potentially save memory bandwidth by
>> setting the CRTC background property and using smaller planes to display
>> the rest of the content.
>>
>> To avoid confusion between different ways of encoding RGB data, we
>> define a standard 64-bit format that should be used for this property's
>> value.  Helper functions and macros are provided to generate and dissect
>> values in this standard format with varying component precision values.
>>
>> Signed-off-by: Raphael Gallais-Pou 
>> Signed-off-by: Matt Roper 
>> ---
>>   drivers/gpu/drm/drm_atomic_state_helper.c |  1 +
>>   drivers/gpu/drm/drm_atomic_uapi.c |  4 +++
>>   drivers/gpu/drm/drm_blend.c   | 34 +--
>>   drivers/gpu/drm/drm_mode_config.c |  6 
>>   include/drm/drm_blend.h   |  1 +
>>   include/drm/drm_crtc.h| 12 
>>   include/drm/drm_mode_config.h |  5 
>>   include/uapi/drm/drm_mode.h   | 28 +++
>>   8 files changed, 89 insertions(+), 2 deletions(-)
> 
> ...
> 
> The question about full vs. limited range seems unnecessary to me, as
> the background color will be used as-is in the blending stage, so
> userspace can just program the correct value that fits the pipeline it
> is setting up.
>
> One more question is, as HDR exists, could we need background colors
> with component values greater than 1.0?

 AR4H color format should cover that case, isn't it ?  
>>>
>>> Yes, but with the inconvenience I mentioned.
>>>
>>> This is a genuine question though, would anyone actually need
>>> background color values > 1.0. I don't know of any case yet where it
>>> would be required. It would imply that plane blending happens in a
>>> color space where >1.0 values are meaningful. I'm not even sure if any
>>> hardware supporting that exists.
>>>
>>> Maybe it would be best to assume that only [0.0, 1.0] pixel value range
>>> is useful, and mention in the commit message that if someone really
>>> needs values outside of that, they should create another background
>>> color property. Then, you can pick a simple unsigned integer pixel
>>> format, too. (I didn't see any 16 bit-per-channel formats like that in
>>> drm_fourcc.h though.)
>>>   
>>
>> I don't think we should artificially limit this to [0.0, 1.0]. As you
>> mentioned above when talking about full vs limited, the userspace
>> understands what's the correct value that fits the pipeline. If that
>> pipeline is FP16 with > 1.0 values then it would make sense that the
>> background color can be > 1.0.
> 
> Ok. The standard FP32 format then for ease of use and guaranteed enough
> range and precision for far into the future?
> 

I don't have a strong preference for FP16 vs FP32. My understanding is
that FP16 is enough to represent linearly encoded data in a way that
looks smooth to humans.

scRGB uses FP16 with linear encoding in a range of [-0.5, 7.4999].

> Or do you want to keep it in 64 bits total, so the UABI can pack
> everything into a u64 instead of needing to create a blob?
> 
> I don't mind as long as it's clearly documented what it is and how it
> works, and it carries enough precision.
> 
> But FP16 with its 10 bits of precision might be too little for integer
> 12-16 bpc pipelines and sinks?
> 
> If the values can go beyond [0.0, 1.0] range, then does the blending
> hardware and the degamma/ctm/gamma coming afterwards cope with them, or
> do they get clamped anyway?
> 

That probably depends on the HW and how it's configured. AMD HW can handle
values above and below [0.0, 1.0].

Harry

> 
> Thanks,
> pq
> 



Re: [PATCH] gpu: ipu-v3: use swap()

2021-07-13 Thread Fabio Estevam
Hi Salah,

On Tue, Jul 13, 2021 at 10:33 AM Salah Triki  wrote:
>
> Use swap() instead of implementing it since it makes code more clean.

s/more clean/cleaner

> Signed-off-by: Salah Triki 
> ---
>  drivers/gpu/ipu-v3/ipu-image-convert.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
> b/drivers/gpu/ipu-v3/ipu-image-convert.c
> index aa1d4b6d278f..5f730cd6009d 100644
> --- a/drivers/gpu/ipu-v3/ipu-image-convert.c
> +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
> @@ -1021,11 +1021,8 @@ static int calc_tile_offsets_planar(struct 
> ipu_image_convert_ctx *ctx,
>
> u_off = y_size - y_off + uv_off;
> v_off = (fmt->uv_packed) ? 0 : u_off + uv_size;
> -   if (fmt->uv_swapped) {
> -   tmp = u_off;
> -   u_off = v_off;
> -   v_off = tmp;

The 'tmp' variable seems to be unused now, so its declaration should be removed.

Thanks


Re: [PULL] drm-misc-fixes

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 10:44:05AM +0200, Thomas Zimmermann wrote:
> Hi Dave and Daniel,
> 
> these two fixes in drm-misc-fixes got lost during last cycle. Sending them
> now.

Applied to drm-fixes, thanks.
-Daniel

> 
> Best regards
> Thomas
> 
> drm-misc-fixes-2021-07-13:
> Short summary of fixes pull:
> 
>  * dma-buf: Fix fence leak in sync_file_merge() error code
>  * drm/panel: nt35510: Don't fail on DSI reads
> The following changes since commit d330099115597bbc238d6758a4930e72b49ea9ba:
> 
>   drm/nouveau: fix dma_address check for CPU/GPU sync (2021-06-24 15:40:44 
> +0200)
> 
> are available in the Git repository at:
> 
>   git://anongit.freedesktop.org/drm/drm-misc tags/drm-misc-fixes-2021-07-13
> 
> for you to fetch changes up to ffe000217c5068c5da07ccb1c0f8cce7ad767435:
> 
>   dma-buf/sync_file: Don't leak fences on merge failure (2021-07-12 13:34:49 
> +0200)
> 
> 
> Short summary of fixes pull:
> 
>  * dma-buf: Fix fence leak in sync_file_merge() error code
>  * drm/panel: nt35510: Don't fail on DSI reads
> 
> 
> Jason Ekstrand (1):
>   dma-buf/sync_file: Don't leak fences on merge failure
> 
> Linus Walleij (1):
>   drm/panel: nt35510: Do not fail if DSI read fails
> 
>  drivers/dma-buf/sync_file.c   | 13 +++--
>  drivers/gpu/drm/panel/panel-novatek-nt35510.c |  4 +---
>  2 files changed, 8 insertions(+), 9 deletions(-)
> 
> --
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer
> 
> -- 
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Felix Imendörffer

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH] drm/shmem-helper: Switch to vmf_insert_pfn

2021-07-13 Thread Daniel Vetter
On Thu, Jun 03, 2021 at 11:08:31PM +0200, Daniel Vetter wrote:
> We want to stop gup, which isn't the case if we use vmf_insert_page
> and VM_MIXEDMAP, because that does not set pte_special.
> 
> v2: With this shmem gem helpers now definitely need CONFIG_MMU (0day)
> 
> v3: add more depends on MMU. For usb drivers this is a bit awkward,
> but really it's correct: To be able to provide a contig mapping of
> buffers to userspace on !MMU platforms we'd need to use the cma
> helpers for these drivers on those platforms. As-is this wont work.
> 
> Also not exactly sure why vm_insert_page doesn't go boom, because that
> definitely wont fly in practice since the pages are non-contig to
> begin with.
> 
> Signed-off-by: Daniel Vetter 
> Cc: Maarten Lankhorst 
> Cc: Maxime Ripard 
> Cc: Thomas Zimmermann 
> Cc: David Airlie 
> Cc: Daniel Vetter 

Merged to drm-misc-next.

Aside, anyone feel like a review on the previous patch? Still not ready to
switch vgem over, but I think I've found the next bug that needs fixing in
shmem helpers.
-Daniel

> ---
>  drivers/gpu/drm/Kconfig| 2 +-
>  drivers/gpu/drm/drm_gem_shmem_helper.c | 4 ++--
>  drivers/gpu/drm/gud/Kconfig| 2 +-
>  drivers/gpu/drm/tiny/Kconfig   | 4 ++--
>  drivers/gpu/drm/udl/Kconfig| 1 +
>  5 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 56a55a6e6239..9c21527b791f 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -206,7 +206,7 @@ config DRM_KMS_CMA_HELPER
>  
>  config DRM_GEM_SHMEM_HELPER
>   bool
> - depends on DRM
> + depends on DRM && MMU
>   help
> Choose this if you need the GEM shmem helper functions
>  
> diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c 
> b/drivers/gpu/drm/drm_gem_shmem_helper.c
> index 6d625cee7a6a..11edd54f0580 100644
> --- a/drivers/gpu/drm/drm_gem_shmem_helper.c
> +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
> @@ -542,7 +542,7 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault 
> *vmf)
>   } else {
>   page = shmem->pages[page_offset];
>  
> - ret = vmf_insert_page(vma, vmf->address, page);
> + ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
>   }
>  
>   mutex_unlock(>pages_lock);
> @@ -612,7 +612,7 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct 
> vm_area_struct *vma)
>   return ret;
>   }
>  
> - vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
> + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND;
>   vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
>   if (shmem->map_wc)
>   vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
> diff --git a/drivers/gpu/drm/gud/Kconfig b/drivers/gpu/drm/gud/Kconfig
> index 1c8601bf4d91..9c1e61f9eec3 100644
> --- a/drivers/gpu/drm/gud/Kconfig
> +++ b/drivers/gpu/drm/gud/Kconfig
> @@ -2,7 +2,7 @@
>  
>  config DRM_GUD
>   tristate "GUD USB Display"
> - depends on DRM && USB
> + depends on DRM && USB && MMU
>   select LZ4_COMPRESS
>   select DRM_KMS_HELPER
>   select DRM_GEM_SHMEM_HELPER
> diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
> index d46f95d9196d..a15f57ace9e7 100644
> --- a/drivers/gpu/drm/tiny/Kconfig
> +++ b/drivers/gpu/drm/tiny/Kconfig
> @@ -31,7 +31,7 @@ config DRM_CIRRUS_QEMU
>  
>  config DRM_GM12U320
>   tristate "GM12U320 driver for USB projectors"
> - depends on DRM && USB
> + depends on DRM && USB && MMU
>   select DRM_KMS_HELPER
>   select DRM_GEM_SHMEM_HELPER
>   help
> @@ -40,7 +40,7 @@ config DRM_GM12U320
>  
>  config DRM_SIMPLEDRM
>   tristate "Simple framebuffer driver"
> - depends on DRM
> + depends on DRM && MMU
>   select DRM_GEM_SHMEM_HELPER
>   select DRM_KMS_HELPER
>   help
> diff --git a/drivers/gpu/drm/udl/Kconfig b/drivers/gpu/drm/udl/Kconfig
> index 1f497d8f1ae5..c744175c6992 100644
> --- a/drivers/gpu/drm/udl/Kconfig
> +++ b/drivers/gpu/drm/udl/Kconfig
> @@ -4,6 +4,7 @@ config DRM_UDL
>   depends on DRM
>   depends on USB
>   depends on USB_ARCH_HAS_HCD
> + depends on MMU
>   select DRM_GEM_SHMEM_HELPER
>   select DRM_KMS_HELPER
>   help
> -- 
> 2.31.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH v2] drm/dp: Move panel DP AUX backlight support to drm_dp_helper

2021-07-13 Thread Doug Anderson
Hi,

On Mon, Jul 12, 2021 at 8:02 AM Douglas Anderson  wrote:
>
> We were getting a depmod error:
>   depmod: ERROR: Cycle detected: drm_kms_helper -> drm -> drm_kms_helper
>
> It looks like the rule is that drm_kms_helper can call into drm, but
> drm can't call into drm_kms_helper. That means we've got to move the
> DP AUX backlight support into drm_dp_helper.
>
> NOTE: as part of this, I didn't try to do any renames of the main
> registration function. Even though it's in the drm_dp_helper, it still
> feels very parallel to drm_panel_of_backlight().
>
> Fixes: 10f7b40e4f30 ("drm/panel: add basic DP AUX backlight support")
> Reported-by: Ville Syrjälä 
> Reported-by: Thomas Zimmermann 
> Signed-off-by: Douglas Anderson 
> ---
> Note that I've compile tested this, but I don't have a device setup
> yet that uses this code. Since the code is basically the same as it
> was this should be OK, but if Rajeev could confirm that nothing is
> broken that'd be nice.
>
> Changes in v2:
> - Guard new functions by the proper configs.
>
>  drivers/gpu/drm/drm_dp_helper.c | 113 
>  drivers/gpu/drm/drm_panel.c | 108 --
>  include/drm/drm_dp_helper.h |  16 +
>  include/drm/drm_panel.h |   8 ---
>  4 files changed, 129 insertions(+), 116 deletions(-)

Pushed to drm-misc-next with Rajeev's review:

072ed3431f5b drm/dp: Move panel DP AUX backlight support to drm_dp_helper

-Doug


Re: [PATCH] drm/i915/gtt: drop the page table optimisation

2021-07-13 Thread Daniel Vetter
On Tue, Jul 13, 2021 at 02:04:31PM +0100, Matthew Auld wrote:
> We skip filling out the pt with scratch entries if the va range covers
> the entire pt, since we later have to fill it with the PTEs for the
> object pages anyway. However this might leave open a small window where
> the PTEs don't point to anything valid for the HW to consume.
> 
> When for example using 2M GTT pages this fill_px() showed up as being
> quite significant in perf measurements, and ends up being completely
> wasted since we ignore the pt and just use the pde directly.
> 
> Anyway, currently we have our PTE construction split between alloc and
> insert, which is probably slightly iffy nowadays, since the alloc
> doesn't actually allocate anything anymore, instead it just sets up the
> page directories and points the PTEs at the scratch page. Later when we
> do the insert step we re-program the PTEs again. Better might be to
> squash the alloc and insert into a single step, then bringing back this
> optimisation(along with some others) should be possible.
> 
> Fixes: 14826673247e ("drm/i915: Only initialize partially filled pagetables")
> Signed-off-by: Matthew Auld 
> Cc: Jon Bloomfield 
> Cc: Chris Wilson 
> Cc: Daniel Vetter 
> Cc:  # v4.15+

This is some impressively convoluted code, and I'm scared.

But as far as I managed to convince myself, your story here checks out.
Problem will be a bit that this code moved around a _lot_ so we'll need a
lot of dedicated backports :-(

Reviewed-by: Daniel Vetter 

> ---
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 3d02c726c746..6e0e52eeb87a 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -303,10 +303,7 @@ static void __gen8_ppgtt_alloc(struct i915_address_space 
> * const vm,
>   __i915_gem_object_pin_pages(pt->base);
>   i915_gem_object_make_unshrinkable(pt->base);
>  
> - if (lvl ||
> - gen8_pt_count(*start, end) < I915_PDES ||
> - intel_vgpu_active(vm->i915))
> - fill_px(pt, vm->scratch[lvl]->encode);
> + fill_px(pt, vm->scratch[lvl]->encode);
>  
>   spin_lock(>lock);
>   if (likely(!pd->entry[idx])) {
> -- 
> 2.26.3
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


[PATCH] gpu: ipu-v3: use swap()

2021-07-13 Thread Salah Triki
Use swap() instead of implementing it since it makes code more clean.

Signed-off-by: Salah Triki 
---
 drivers/gpu/ipu-v3/ipu-image-convert.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
b/drivers/gpu/ipu-v3/ipu-image-convert.c
index aa1d4b6d278f..5f730cd6009d 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -1021,11 +1021,8 @@ static int calc_tile_offsets_planar(struct 
ipu_image_convert_ctx *ctx,
 
u_off = y_size - y_off + uv_off;
v_off = (fmt->uv_packed) ? 0 : u_off + uv_size;
-   if (fmt->uv_swapped) {
-   tmp = u_off;
-   u_off = v_off;
-   v_off = tmp;
-   }
+   if (fmt->uv_swapped)
+   swap(u_off, v_off);
 
image->tile[tile].offset = y_off;
image->tile[tile].u_off = u_off;
-- 
2.25.1



Re: [PATCH] use swap()

2021-07-13 Thread Alex Deucher
On Tue, Jul 13, 2021 at 3:09 AM Salah Triki  wrote:
>
> Use swap() instead of implementing it since it makes code more clean.
>
> Signed-off-by: Salah Triki 

Patches for this driver generally have the following prefix in the subject:
gpu: ipu-v3:

Alex

> ---
>  drivers/gpu/ipu-v3/ipu-image-convert.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c 
> b/drivers/gpu/ipu-v3/ipu-image-convert.c
> index aa1d4b6d278f..5f730cd6009d 100644
> --- a/drivers/gpu/ipu-v3/ipu-image-convert.c
> +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
> @@ -1021,11 +1021,8 @@ static int calc_tile_offsets_planar(struct 
> ipu_image_convert_ctx *ctx,
>
> u_off = y_size - y_off + uv_off;
> v_off = (fmt->uv_packed) ? 0 : u_off + uv_size;
> -   if (fmt->uv_swapped) {
> -   tmp = u_off;
> -   u_off = v_off;
> -   v_off = tmp;
> -   }
> +   if (fmt->uv_swapped)
> +   swap(u_off, v_off);
>
> image->tile[tile].offset = y_off;
> image->tile[tile].u_off = u_off;
> --
> 2.25.1
>


Re: [PATCH] dim/drm-misc: Add rule to not push patches with issues

2021-07-13 Thread Thomas Zimmermann



Am 09.07.21 um 10:11 schrieb Daniel Vetter:

We kinda left this out, and I like the wording from the drm-intel
side, so add that. Motivated by a discussion with Christian.


I always thought this goes without saying.



Cc: Christian König 
Cc: Maarten Lankhorst 
Cc: Maxime Ripard 
Cc: Thomas Zimmermann 
Signed-off-by: Daniel Vetter 


Acked-by: Thomas Zimmermann 


---
  committer-drm-misc.rst | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/committer-drm-misc.rst b/committer-drm-misc.rst
index 9497a5d26a9d..110ca8b0525e 100644
--- a/committer-drm-misc.rst
+++ b/committer-drm-misc.rst
@@ -21,6 +21,9 @@ Merge Criteria
  
  Right now the only hard merge criteria are:
  
+* There must not be open issues or unresolved or conflicting feedback from

+  anyone. Clear them up first. Defer to maintainers as needed.
+
  * Patch is properly reviewed or at least Ack, i.e. don't just push your own
stuff directly. This rule holds even more for bugfix patches - it would be
embarrassing if the bugfix contains a small gotcha that review would have



--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer



OpenPGP_signature
Description: OpenPGP digital signature


[PATCH] drm/i915/gtt: drop the page table optimisation

2021-07-13 Thread Matthew Auld
We skip filling out the pt with scratch entries if the va range covers
the entire pt, since we later have to fill it with the PTEs for the
object pages anyway. However this might leave open a small window where
the PTEs don't point to anything valid for the HW to consume.

When for example using 2M GTT pages this fill_px() showed up as being
quite significant in perf measurements, and ends up being completely
wasted since we ignore the pt and just use the pde directly.

Anyway, currently we have our PTE construction split between alloc and
insert, which is probably slightly iffy nowadays, since the alloc
doesn't actually allocate anything anymore, instead it just sets up the
page directories and points the PTEs at the scratch page. Later when we
do the insert step we re-program the PTEs again. Better might be to
squash the alloc and insert into a single step, then bringing back this
optimisation(along with some others) should be possible.

Fixes: 14826673247e ("drm/i915: Only initialize partially filled pagetables")
Signed-off-by: Matthew Auld 
Cc: Jon Bloomfield 
Cc: Chris Wilson 
Cc: Daniel Vetter 
Cc:  # v4.15+
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 3d02c726c746..6e0e52eeb87a 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -303,10 +303,7 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * 
const vm,
__i915_gem_object_pin_pages(pt->base);
i915_gem_object_make_unshrinkable(pt->base);
 
-   if (lvl ||
-   gen8_pt_count(*start, end) < I915_PDES ||
-   intel_vgpu_active(vm->i915))
-   fill_px(pt, vm->scratch[lvl]->encode);
+   fill_px(pt, vm->scratch[lvl]->encode);
 
spin_lock(>lock);
if (likely(!pd->entry[idx])) {
-- 
2.26.3



Re: [Intel-gfx] [PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Mika Kuoppala
Matthew Auld  writes:

> Try to document the object caching related bits, like cache_coherent and
> cache_dirty.
>
> Suggested-by: Daniel Vetter 
> Signed-off-by: Matthew Auld 
> ---
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 135 +-
>  drivers/gpu/drm/i915/i915_drv.h   |   9 --
>  2 files changed, 131 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index ef3de2ae9723..02c3529b774c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -92,6 +92,57 @@ struct drm_i915_gem_object_ops {
>   const char *name; /* friendly name for debug, e.g. lockdep classes */
>  };
>  
> +/**
> + * enum i915_cache_level - The supported GTT caching values for system memory
> + * pages.
> + *
> + * These translate to some special GTT PTE bits when binding pages into some
> + * address space. It also determines whether an object, or rather its pages 
> are
> + * coherent with the GPU, when also reading or writing through the CPU cache
> + * with those pages.
> + *
> + * Userspace can also control this through struct drm_i915_gem_caching.
> + */
> +enum i915_cache_level {
> + /**
> +  * @I915_CACHE_NONE:
> +  *
> +  * Not coherent with the CPU cache. If the cache is dirty and we need
> +  * the underlying pages to be coherent with some later GPU access then
> +  * we need to manually flush the pages.
> +  *
> +  * Note that on shared-LLC platforms reads through the CPU cache are
> +  * still coherent even with this setting. See also
> +  * I915_BO_CACHE_COHERENT_FOR_READ for more details.
> +  */
> + I915_CACHE_NONE = 0,
> + /**
> +  * @I915_CACHE_LLC:
> +  *
> +  * Coherent with the CPU cache. If the cache is dirty, then the GPU will
> +  * ensure that access remains coherent, when both reading and writing
> +  * through the CPU cache.
> +  *
> +  * Applies to both platforms with shared-LLC(HAS_LLC), and snooping
> +  * based platforms(HAS_SNOOP).
> +  */
> + I915_CACHE_LLC,
> + /**
> +  * @I915_CACHE_L3_LLC:
> +  *
> +  * gen7+, L3 sits between the domain specifc caches, eg sampler/render

typo: specifc

> +  * caches, and the large Last-Level-Cache. LLC is coherent with the CPU,
> +  * but L3 is only visible to the GPU.
> +  */

I dont get the difference between this and I915_CACHE_LLC.
Could the diff between LLC and L3_LLC be described here with example?

Thanks,
-Mika

> + I915_CACHE_L3_LLC,
> + /**
> +  * @I915_CACHE_WT:
> +  *
> +  * hsw:gt3e Write-through for scanout buffers.
> +  */
> + I915_CACHE_WT,
> +};
> +
>  enum i915_map_type {
>   I915_MAP_WB = 0,
>   I915_MAP_WC,
> @@ -228,14 +279,90 @@ struct drm_i915_gem_object {
>   unsigned int mem_flags;
>  #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
>  #define I915_BO_FLAG_IOMEM   BIT(1) /* Object backed by IO memory */
> - /*
> -  * Is the object to be mapped as read-only to the GPU
> -  * Only honoured if hardware has relevant pte bit
> + /**
> +  * @cache_level: The desired GTT caching level.
> +  *
> +  * See enum i915_cache_level for possible values, along with what
> +  * each does.
>*/
>   unsigned int cache_level:3;
> - unsigned int cache_coherent:2;
> + /**
> +  * @cache_coherent:
> +  *
> +  * Track whether the pages are coherent with the GPU if reading or
> +  * writing through the CPU cache.
> +  *
> +  * This largely depends on the @cache_level, for example if the object
> +  * is marked as I915_CACHE_LLC, then GPU access is coherent for both
> +  * reads and writes through the CPU cache.
> +  *
> +  * Note that on platforms with shared-LLC support(HAS_LLC) reads through
> +  * the CPU cache are always coherent, regardless of the @cache_level. On
> +  * snooping based platforms this is not the case, unless the full
> +  * I915_CACHE_LLC or similar setting is used.
> +  *
> +  * As a result of this we need to track coherency separately for reads
> +  * and writes, in order to avoid superfluous flushing on shared-LLC
> +  * platforms, for reads.
> +  *
> +  * I915_BO_CACHE_COHERENT_FOR_READ:
> +  *
> +  * When reading through the CPU cache, the GPU is still coherent. Note
> +  * that no data has actually been modified here, so it might seem
> +  * strange that we care about this.
> +  *
> +  * As an example, if some object is mapped on the CPU with write-back
> +  * caching, and we read some page, then the cache likely now contains
> +  * the data from that read. At this point the cache and main memory
> +  * match up, so all good. But next the GPU needs to write some data to
> +  * that same 

Re: [PATCH v4 02/18] drm/sched: Barriers are needed for entity->last_scheduled

2021-07-13 Thread Christian König

Am 13.07.21 um 11:10 schrieb Daniel Vetter:

On Tue, Jul 13, 2021 at 9:25 AM Christian König
 wrote:

Am 13.07.21 um 08:50 schrieb Daniel Vetter:

On Tue, Jul 13, 2021 at 8:35 AM Christian König
 wrote:

Am 12.07.21 um 19:53 schrieb Daniel Vetter:

It might be good enough on x86 with just READ_ONCE, but the write side
should then at least be WRITE_ONCE because x86 has total store order.

It's definitely not enough on arm.

Fix this proplery, which means
- explain the need for the barrier in both places
- point at the other side in each comment

Also pull out the !sched_list case as the first check, so that the
code flow is clearer.

While at it sprinkle some comments around because it was very
non-obvious to me what's actually going on here and why.

Note that we really need full barriers here, at first I thought
store-release and load-acquire on ->last_scheduled would be enough,
but we actually requiring ordering between that and the queue state.

v2: Put smp_rmp() in the right place and fix up comment (Andrey)

Signed-off-by: Daniel Vetter 
Cc: "Christian König" 
Cc: Steven Price 
Cc: Daniel Vetter 
Cc: Andrey Grodzovsky 
Cc: Lee Jones 
Cc: Boris Brezillon 
---
drivers/gpu/drm/scheduler/sched_entity.c | 27 ++--
1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index f7347c284886..89e3f6eaf519 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -439,8 +439,16 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct 
drm_sched_entity *entity)
dma_fence_set_error(_job->s_fence->finished, -ECANCELED);

dma_fence_put(entity->last_scheduled);
+
entity->last_scheduled = dma_fence_get(_job->s_fence->finished);

+ /*
+  * If the queue is empty we allow drm_sched_entity_select_rq() to
+  * locklessly access ->last_scheduled. This only works if we set the
+  * pointer before we dequeue and if we a write barrier here.
+  */
+ smp_wmb();
+

Again, conceptual those barriers should be part of the spsc_queue
container and not externally.

That would be extremely unusual api. Let's assume that your queue is
very dumb, and protected by a simple lock. That's about the maximum
any user could expect.

But then you still need barriers here, because linux locks (spinlock,
mutex) are defined to be one-way barriers: Stuff that's inside is
guaranteed to be done insinde, but stuff outside of the locked region
can leak in. They're load-acquire/store-release barriers. So not good
enough.

You really need to have barriers here, and they really all need to be
documented properly. And yes that's a shit-ton of work in drm/sched,
because it's full of yolo lockless stuff.

The other case you could make is that this works like a wakeup queue,
or similar. The rules there are:
- wake_up (i.e. pushing something into the queue) is a store-release barrier
- the waked up (i.e. popping an entry) is a load acquire barrier
Which is obviuosly needed because otherwise you don't have coherency
for the data queued up. And again not the barriers you're locking for
here.

Exactly that was the idea, yes.


Either way, we'd still need the comments, because it's still lockless
trickery, and every single one of that needs to have a comment on both
sides to explain what's going on.

Essentially replace spsc_queue with an llist underneath, and that's
the amount of barriers a data structure should provide. Anything else
is asking your datastructure to paper over bugs in your users.

This is similar to how atomic_t is by default completely unordered,
and users need to add barriers as needed, with comments.

My main problem is as always that kernel atomics work different than
userspace atomics.


I think this is all to make sure people don't just write lockless algorithms
because it's a cool idea, but are forced to think this all through.
Which seems to not have happened very consistently for drm/sched, so I
guess needs to be fixed.

Well at least initially that was all perfectly thought through. The
problem is nobody is really maintaining that stuff.


I'm definitely not going to hide all that by making the spsc_queue
stuff provide random unjustified barriers just because that would
paper over drm/sched bugs. We need to fix the actual bugs, and
preferrable all of them. I've found a few, but I wasn't involved in
drm/sched thus far, so best I can do is discover them as we go.

I don't think that those are random unjustified barriers at all and it
sounds like you didn't grip what I said here.

See the spsc queue must have the following semantics:

1. When you pop a job all changes made before you push the job must be
visible.

This is the standard barriers that also wake-up queues have, it's just
store-release+load-acquire.


2. When the queue becomes empty all the changes made before you pop the
last job must be visible.

This is 

Re: [PATCH 1/1] fbmem: Do not delete the mode that is still in use

2021-07-13 Thread Daniel Vetter
On Mon, Jul 12, 2021 at 4:37 PM Daniel Vetter  wrote:
> On Mon, Jul 12, 2021 at 04:55:44PM +0800, Zhen Lei wrote:
> > The execution of fb_delete_videomode() is not based on the result of the
> > previous fbcon_mode_deleted(). As a result, the mode is directly deleted,
> > regardless of whether it is still in use, which may cause UAF.
> >
> > ==
> > BUG: KASAN: use-after-free in fb_mode_is_equal+0x36e/0x5e0 \
> > drivers/video/fbdev/core/modedb.c:924
> > Read of size 4 at addr 88807e0ddb1c by task syz-executor.0/18962
> >
> > CPU: 2 PID: 18962 Comm: syz-executor.0 Not tainted 5.10.45-rc1+ #3
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ...
> > Call Trace:
> >  __dump_stack lib/dump_stack.c:77 [inline]
> >  dump_stack+0x137/0x1be lib/dump_stack.c:118
> >  print_address_description+0x6c/0x640 mm/kasan/report.c:385
> >  __kasan_report mm/kasan/report.c:545 [inline]
> >  kasan_report+0x13d/0x1e0 mm/kasan/report.c:562
> >  fb_mode_is_equal+0x36e/0x5e0 drivers/video/fbdev/core/modedb.c:924
> >  fbcon_mode_deleted+0x16a/0x220 drivers/video/fbdev/core/fbcon.c:2746
> >  fb_set_var+0x1e1/0xdb0 drivers/video/fbdev/core/fbmem.c:975
> >  do_fb_ioctl+0x4d9/0x6e0 drivers/video/fbdev/core/fbmem.c:1108
> >  vfs_ioctl fs/ioctl.c:48 [inline]
> >  __do_sys_ioctl fs/ioctl.c:753 [inline]
> >  __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:739
> >  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
> >  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> >
> > Freed by task 18960:
> >  kasan_save_stack mm/kasan/common.c:48 [inline]
> >  kasan_set_track+0x3d/0x70 mm/kasan/common.c:56
> >  kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355
> >  __kasan_slab_free+0x108/0x140 mm/kasan/common.c:422
> >  slab_free_hook mm/slub.c:1541 [inline]
> >  slab_free_freelist_hook+0xd6/0x1a0 mm/slub.c:1574
> >  slab_free mm/slub.c:3139 [inline]
> >  kfree+0xca/0x3d0 mm/slub.c:4121
> >  fb_delete_videomode+0x56a/0x820 drivers/video/fbdev/core/modedb.c:1104
> >  fb_set_var+0x1f3/0xdb0 drivers/video/fbdev/core/fbmem.c:978
> >  do_fb_ioctl+0x4d9/0x6e0 drivers/video/fbdev/core/fbmem.c:1108
> >  vfs_ioctl fs/ioctl.c:48 [inline]
> >  __do_sys_ioctl fs/ioctl.c:753 [inline]
> >  __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:739
> >  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
> >  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> >
> > Fixes: 13ff178ccd6d ("fbcon: Call fbcon_mode_deleted/new_modelist directly")
> > Signed-off-by: Zhen Lei 
>
> Nice catch, that indeed got lost.
>
> Reviewed-by: Daniel Vetter 
> Cc:  # v5.3+
>
> Needs to be applied to drm-misc-fixes, but the tree isn't ready yet.

Tree still isn't ready, adding Thomas.

Thomas, can you pls apply this when drm-misc-fixes is forwarded?

Thanks, Daniel

> -Daniel
>
> > ---
> >  drivers/video/fbdev/core/fbmem.c | 12 +---
> >  1 file changed, 5 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/video/fbdev/core/fbmem.c 
> > b/drivers/video/fbdev/core/fbmem.c
> > index 98f193078c05..1c855145711b 100644
> > --- a/drivers/video/fbdev/core/fbmem.c
> > +++ b/drivers/video/fbdev/core/fbmem.c
> > @@ -970,13 +970,11 @@ fb_set_var(struct fb_info *info, struct 
> > fb_var_screeninfo *var)
> >   fb_var_to_videomode(, >var);
> >   /* make sure we don't delete the videomode of current var */
> >   ret = fb_mode_is_equal(, );
> > -
> > - if (!ret)
> > - fbcon_mode_deleted(info, );
> > -
> > - if (!ret)
> > - fb_delete_videomode(, >modelist);
> > -
> > + if (!ret) {
> > + ret = fbcon_mode_deleted(info, );
> > + if (!ret)
> > + fb_delete_videomode(, >modelist);
> > + }
> >
> >   return ret ? -EINVAL : 0;
> >   }
> > --
> > 2.25.1
> >
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


[PATCH 4/5] drm/i915: pull in some more kernel-doc

2021-07-13 Thread Matthew Auld
Pull in the kernel-doc for drm_i915_gem_object.

Signed-off-by: Matthew Auld 
Cc: Daniel Vetter 
---
 Documentation/gpu/i915.rst | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 204ebdaadb45..77558084e989 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -387,6 +387,13 @@ GEM BO Management Implementation Details
 .. kernel-doc:: drivers/gpu/drm/i915/i915_vma_types.h
:doc: Virtual Memory Address
 
+GEM Buffer Object
+-
+This section documents our core GEM object, and related bits.
+
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+   :internal:
+
 Buffer Object Eviction
 --
 
-- 
2.26.3



[PATCH 2/5] drm/i915/uapi: convert drm_i915_gem_madvise to kernel-doc

2021-07-13 Thread Matthew Auld
Add some kernel doc for this. We can then just reference this later when
documenting madv in the kernel.

Signed-off-by: Matthew Auld 
Cc: Daniel Vetter 
---
 include/uapi/drm/i915_drm.h | 50 +++--
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e334a8b14ef2..a839085b6577 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1492,20 +1492,54 @@ struct drm_i915_get_pipe_from_crtc_id {
__u32 pipe;
 };
 
-#define I915_MADV_WILLNEED 0
-#define I915_MADV_DONTNEED 1
-#define __I915_MADV_PURGED 2 /* internal state */
-
+/**
+ * struct drm_i915_gem_madvise - Update the madvise hint for the object.
+ *
+ * The kernel uses this to know when it can safely discard the backing pages 
for
+ * an object, when under memory pressure.
+ */
 struct drm_i915_gem_madvise {
-   /** Handle of the buffer to change the backing store advice */
+   /**
+* @handle: Handle of the buffer to change the backing store advice for.
+*/
__u32 handle;
 
-   /* Advice: either the buffer will be needed again in the near future,
-* or wont be and could be discarded under memory pressure.
+   /**
+* @madv: The madvise hint to set for the object.
+*
+* Supported values:
+*
+* I915_MADV_WILLNEED:
+*
+* The buffer will be needed again in the near future. By default all
+* objects are set as I915_MADV_WILLNEED. Once the pages become
+* dirty, the kernel is no longer allowed to simply discard the pages,
+* and instead can only resort to swapping the pages out, if under
+* memory pressure, where the page contents must persist when swapping
+* the pages back in.
+*
+* I915_MADV_DONTNEED:
+*
+* The buffer wont be needed. The pages and their contents can be
+* discarded under memory pressure.
+*
+* Note that if the pages were discarded then the kernel updates the
+* internal madvise value of the object to __I915_MADV_PURGED, which
+* effectively kills the object, since all further requests to allocate
+* pages for the object will be rejected. At this point a new object is
+* needed. This will be reflected in @retained.
 */
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
__u32 madv;
 
-   /** Whether the backing store still exists. */
+   /**
+* @retained: Whether the backing store still exists.
+*
+* Set to false if the kernel purged the object and marked the object as
+* __I915_MADV_PURGED.
+*/
__u32 retained;
 };
 
-- 
2.26.3



[PATCH 1/5] drm/i915: document caching related bits

2021-07-13 Thread Matthew Auld
Try to document the object caching related bits, like cache_coherent and
cache_dirty.

Suggested-by: Daniel Vetter 
Signed-off-by: Matthew Auld 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 135 +-
 drivers/gpu/drm/i915/i915_drv.h   |   9 --
 2 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index ef3de2ae9723..02c3529b774c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -92,6 +92,57 @@ struct drm_i915_gem_object_ops {
const char *name; /* friendly name for debug, e.g. lockdep classes */
 };
 
+/**
+ * enum i915_cache_level - The supported GTT caching values for system memory
+ * pages.
+ *
+ * These translate to some special GTT PTE bits when binding pages into some
+ * address space. It also determines whether an object, or rather its pages are
+ * coherent with the GPU, when also reading or writing through the CPU cache
+ * with those pages.
+ *
+ * Userspace can also control this through struct drm_i915_gem_caching.
+ */
+enum i915_cache_level {
+   /**
+* @I915_CACHE_NONE:
+*
+* Not coherent with the CPU cache. If the cache is dirty and we need
+* the underlying pages to be coherent with some later GPU access then
+* we need to manually flush the pages.
+*
+* Note that on shared-LLC platforms reads through the CPU cache are
+* still coherent even with this setting. See also
+* I915_BO_CACHE_COHERENT_FOR_READ for more details.
+*/
+   I915_CACHE_NONE = 0,
+   /**
+* @I915_CACHE_LLC:
+*
+* Coherent with the CPU cache. If the cache is dirty, then the GPU will
+* ensure that access remains coherent, when both reading and writing
+* through the CPU cache.
+*
+* Applies to both platforms with shared-LLC(HAS_LLC), and snooping
+* based platforms(HAS_SNOOP).
+*/
+   I915_CACHE_LLC,
+   /**
+* @I915_CACHE_L3_LLC:
+*
+* gen7+, L3 sits between the domain specifc caches, eg sampler/render
+* caches, and the large Last-Level-Cache. LLC is coherent with the CPU,
+* but L3 is only visible to the GPU.
+*/
+   I915_CACHE_L3_LLC,
+   /**
+* @I915_CACHE_WT:
+*
+* hsw:gt3e Write-through for scanout buffers.
+*/
+   I915_CACHE_WT,
+};
+
 enum i915_map_type {
I915_MAP_WB = 0,
I915_MAP_WC,
@@ -228,14 +279,90 @@ struct drm_i915_gem_object {
unsigned int mem_flags;
 #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
 #define I915_BO_FLAG_IOMEM   BIT(1) /* Object backed by IO memory */
-   /*
-* Is the object to be mapped as read-only to the GPU
-* Only honoured if hardware has relevant pte bit
+   /**
+* @cache_level: The desired GTT caching level.
+*
+* See enum i915_cache_level for possible values, along with what
+* each does.
 */
unsigned int cache_level:3;
-   unsigned int cache_coherent:2;
+   /**
+* @cache_coherent:
+*
+* Track whether the pages are coherent with the GPU if reading or
+* writing through the CPU cache.
+*
+* This largely depends on the @cache_level, for example if the object
+* is marked as I915_CACHE_LLC, then GPU access is coherent for both
+* reads and writes through the CPU cache.
+*
+* Note that on platforms with shared-LLC support(HAS_LLC) reads through
+* the CPU cache are always coherent, regardless of the @cache_level. On
+* snooping based platforms this is not the case, unless the full
+* I915_CACHE_LLC or similar setting is used.
+*
+* As a result of this we need to track coherency separately for reads
+* and writes, in order to avoid superfluous flushing on shared-LLC
+* platforms, for reads.
+*
+* I915_BO_CACHE_COHERENT_FOR_READ:
+*
+* When reading through the CPU cache, the GPU is still coherent. Note
+* that no data has actually been modified here, so it might seem
+* strange that we care about this.
+*
+* As an example, if some object is mapped on the CPU with write-back
+* caching, and we read some page, then the cache likely now contains
+* the data from that read. At this point the cache and main memory
+* match up, so all good. But next the GPU needs to write some data to
+* that same page. Now if the @cache_level is I915_CACHE_NONE and the
+* the platform doesn't have the shared-LLC, then the GPU will
+* effectively skip invalidating the cache(or however that works
+* internally) when writing the new value.  This 

[PATCH 5/5] drm/i915/ehl: unconditionally flush the pages on acquire

2021-07-13 Thread Matthew Auld
EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
possible for userspace to bypass the GTT caching bits set by the kernel,
as per the given object cache_level. This is troublesome since the heavy
flush we apply when first acquiring the pages is skipped if the kernel
thinks the object is coherent with the GPU. As a result it might be
possible to bypass the cache and read the contents of the page directly,
which could be stale data. If it's just a case of userspace shooting
themselves in the foot then so be it, but since i915 takes the stance of
always zeroing memory before handing it to userspace, we need to prevent
this.

v2: this time actually set cache_dirty in put_pages()
v3: move to get_pages() which looks simpler

BSpec: 34007
References: 046091758b50 ("Revert "drm/i915/ehl: Update MOCS table for EHL"")
Signed-off-by: Matthew Auld 
Cc: Tejas Upadhyay 
Cc: Francisco Jerez 
Cc: Lucas De Marchi 
Cc: Jon Bloomfield 
Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Daniel Vetter 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h   |  6 ++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 18 ++
 2 files changed, 24 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index da2194290436..7089d1b222c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -522,6 +522,12 @@ struct drm_i915_gem_object {
 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
 * for both reads and writes though the CPU cache. So pretty much this
 * should only be needed for I915_CACHE_NONE objects.
+*
+* Update: Some bonkers hardware decided to add the 'Bypass LLC' MOCS
+* entry, which defeats our @cache_coherent tracking, since userspace
+* can freely bypass the CPU cache when touching the pages with the GPU,
+* where the kernel is completely unaware. On such platform we need
+* apply the sledgehammer-on-acquire regardless of the @cache_coherent.
 */
unsigned int cache_dirty:1;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 6a04cce188fc..11f072193f3b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -182,6 +182,24 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st);
 
+   /*
+* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
+* possible for userspace to bypass the GTT caching bits set by the
+* kernel, as per the given object cache_level. This is troublesome
+* since the heavy flush we apply when first gathering the pages is
+* skipped if the kernel thinks the object is coherent with the GPU. As
+* a result it might be possible to bypass the cache and read the
+* contents of the page directly, which could be stale data. If it's
+* just a case of userspace shooting themselves in the foot then so be
+* it, but since i915 takes the stance of always zeroing memory before
+* handing it to userspace, we need to prevent this.
+*
+* By setting cache_dirty here we make the clflush in set_pages
+* unconditional on such platforms.
+*/
+   if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
+   obj->cache_dirty = true;
+
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
return 0;
-- 
2.26.3



[PATCH 3/5] drm/i915: convert drm_i915_gem_object to kernel-doc

2021-07-13 Thread Matthew Auld
Before we can pull in the previous kernel doc for the caching bits, we
first get to add kernel doc for all of drm_i915_gem_object so this
actually builds.

Signed-off-by: Matthew Auld 
Cc: Daniel Vetter 
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 422 +++---
 1 file changed, 366 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 02c3529b774c..da2194290436 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -174,24 +174,75 @@ struct i915_gem_object_page_iter {
struct mutex lock; /* protects this cache */
 };
 
-struct drm_i915_gem_object {
-   /*
-* We might have reason to revisit the below since it wastes
-* a lot of space for non-ttm gem objects.
-* In any case, always use the accessors for the ttm_buffer_object
-* when accessing it.
+/**
+ * struct i915_page_sizes - Track the various pieces we need to
+ * both track and construct huge GTT entries, when binding the
+ * object.
+ */
+struct i915_page_sizes {
+   /**
+* @phys:
+*
+* The sg mask of the pages sg_table. i.e the
+* mask of of the lengths for each sg entry.
 */
+   unsigned int phys;
+
+   /**
+* @sg:
+*
+* The gtt page sizes we are allowed to use given
+* the sg mask and the supported page sizes. This will
+* express the smallest unit we can use for the whole
+* object, as well as the larger sizes we may be able to
+* use opportunistically.
+*/
+   unsigned int sg;
+
+   /**
+* @gtt:
+*
+* The actual gtt page size usage. Since we can
+* have multiple vma associated with this object we need
+* to prevent any trampling of state, hence a copy of
+* this struct also lives in each vma, therefore the gtt
+* value here should only be read/write through the vma.
+*/
+   unsigned int gtt;
+};
+
+/**
+ * struct drm_i915_gem_object - Our core GEM object which extends the base
+ * struct drm_gem_object behaviour.
+ */
+struct drm_i915_gem_object {
union {
+   /** @base: The base DRM GEM object. */
struct drm_gem_object base;
+
+   /**
+* @__do_not_access:
+*
+* The base TTM object, if we are using the TTM backend. Note
+* that this also embeds its own DRM_GEM base object.
+*
+* We might have reason to revisit the below since it wastes a
+* lot of space for non-ttm gem objects.  In any case, always
+* use the accessors for the ttm_buffer_object when accessing
+* it.
+*/
struct ttm_buffer_object __do_not_access;
};
 
+   /**
+* @ops: The struct drm_i915_gem_object_ops interface implemented by the
+* object instance.
+*/
const struct drm_i915_gem_object_ops *ops;
 
+   /** @vma: Track all the struct i915_vma instances for this object. */
struct {
-   /**
-* @vma.lock: protect the list/tree of vmas
-*/
+   /** @vma.lock: protect the list/tree of vmas */
spinlock_t lock;
 
/**
@@ -224,7 +275,9 @@ struct drm_i915_gem_object {
 * this translation from object to context->handles_vma.
 */
struct list_head lut_list;
-   spinlock_t lut_lock; /* guards lut_list */
+
+   /** @lut_lock: Guards the lut_list */
+   spinlock_t lut_lock;
 
/**
 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
@@ -234,29 +287,123 @@ struct drm_i915_gem_object {
 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
 */
struct list_head obj_link;
-   /**
-* @shared_resv_from: The object shares the resv from this vm.
-*/
+
+   /** @shares_resv_from: The object shares the resv from this vm. */
struct i915_address_space *shares_resv_from;
 
union {
+   /** @rcu: Embedded rcu_head */
struct rcu_head rcu;
+
+   /**
+* @freed:
+*
+* When objects need to be destroyed we batch them together into
+* an llist, for a separate worker thread to then pick up and
+* process.
+*/
struct llist_node freed;
};
 
/**
-* Whether the object is currently in the GGTT mmap.
+* @userfault_count: Whether the object is currently in the GGTT mmap.
 */
unsigned int userfault_count;
+   /**
+* @userfault_link:
+*
+* We need to maintain the list of all objects which might have been
+

Re: [PATCH v2 1/2] dt-bindings: display: rockchip: Add compatible for rk3568 HDMI

2021-07-13 Thread Robin Murphy

On 2021-07-07 13:03, Benjamin Gaignard wrote:

Define a new compatible for rk3568 HDMI.
This version of HDMI hardware block needs two new clocks hclk_vio and hclk
to provide phy reference clocks.


Do you know what hclk_vio is and whether it's actually necessary? I 
don't see any mention of it downstream, and based on previous experience 
I'm suspicious that it might be just the parent of hclk, and thus should 
not need to be explicitly claimed by the device or baked into it's binding.


Robin.


Signed-off-by: Benjamin Gaignard 
---
version 2:
- Add the clocks needed for the phy.

  .../bindings/display/rockchip/rockchip,dw-hdmi.yaml | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml 
b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
index 75cd9c686e985..cb8643b3a8b84 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
@@ -23,6 +23,7 @@ properties:
- rockchip,rk3288-dw-hdmi
- rockchip,rk3328-dw-hdmi
- rockchip,rk3399-dw-hdmi
+  - rockchip,rk3568-dw-hdmi
  
reg-io-width:

  const: 4
@@ -51,8 +52,11 @@ properties:
- vpll
- enum:
- grf
+  - hclk_vio
+  - vpll
+  - enum:
+  - hclk
- vpll
-  - const: vpll
  
ddc-i2c-bus:

  $ref: /schemas/types.yaml#/definitions/phandle



Re: [PATCH] drm/msm/dsi: add support for dsi test pattern generator

2021-07-13 Thread Marijn Suijten




On 6/29/21 9:04 PM, Abhinav Kumar wrote:

During board bringups its useful to have a DSI test pattern
generator to isolate a DPU vs a DSI issue and focus on the relevant
hardware block.

To facilitate this, add an API which triggers the DSI controller
test pattern. The expected output is a rectangular checkered pattern.

This has been validated on a single DSI video mode panel by calling it
right after drm_panel_enable() which is also the ideal location to use
this as the DSI host and the panel have been initialized by then.

Further validation on dual DSI and command mode panel is pending.
If there are any fix ups needed for those, it shall be applied on top
of this change.

Signed-off-by: Abhinav Kumar 
Reviewed-by: Dmitry Baryshkov 



Tested-by: Marijn Suijten 

[...]

+static void msm_dsi_host_cmd_test_pattern_setup(struct msm_dsi_host *msm_host)
+{
+   u32 reg;
+
+   reg = dsi_read(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL);
+
+   dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CMD_MDP_INIT_VAL0, 0xff);
+
+   reg |= (0x3 << 0x8);
+   dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL, reg);
+   /* draw checkered rectangle pattern */
+   dsi_write(msm_host, REG_DSI_TPG_MAIN_CONTROL2, (0x1 << 0x7));



How about BIT(7)?

On SM6125 this seems to change the color intensity of the pattern; it is 
always colored lines of a few pixels wide alternating R, B and G from 
left to right.  Is it possible to document the meaning and available 
values of these registers, especially if they differ between SoC / DSI 
block?


Kind regards,
Marijn


+   DBG("Cmd test pattern setup done\n");
+}

[...]


Re: [Intel-gfx] [PATCH v5] drm/i915: Be more gentle with exiting non-persistent context

2021-07-13 Thread Tvrtko Ursulin



Ping for any reviewers? This fixes a customer issue on heavily loaded 
transcode boxes by avoiding false GPU hang reports upon pressing Ctrl-C.


Regards,

Tvrtko

On 16/06/2021 11:09, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
exited non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

15ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

v2:
  * Streamline fast path checks.

v3:
  * Simplify by using only schedulable status.
  * Increase timeout to 20ms.

v4:
  * Fix live_execlists selftest.

v5:
  * Fix logic in kill_engines.

Signed-off-by: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Zhen Han 
---
  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 22 +--
  drivers/gpu/drm/i915/gt/intel_context.c   |  2 ++
  drivers/gpu/drm/i915/gt/intel_context.h   | 17 +-
  drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
  .../drm/i915/gt/intel_execlists_submission.c  | 11 --
  drivers/gpu/drm/i915/gt/selftest_execlists.c  | 20 +++--
  drivers/gpu/drm/i915/i915_request.c   |  2 +-
  7 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7720b8c22c81..6289d82d55d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct 
intel_context *ce)
return engine;
  }
  
-static void kill_engines(struct i915_gem_engines *engines, bool ban)

+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
  {
struct i915_gem_engines_iter it;
struct intel_context *ce;
@@ -440,8 +441,15 @@ static void kill_engines(struct i915_gem_engines *engines, 
bool ban)
 */
for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine;
+   bool skip = false;
  
-		if (ban && intel_context_set_banned(ce))

+   if (ban)
+   skip = intel_context_set_banned(ce);
+   else if (!persistent)
+   skip = !intel_context_clear_schedulable(ce);
+
+   /* Already previously banned or made non-schedulable? */
+   if (skip)
continue;
  
  		/*

@@ -454,7 +462,7 @@ static void kill_engines(struct i915_gem_engines *engines, 
bool ban)
engine = active_engine(ce);
  
  		/* First attempt to gracefully cancel the context */

-   if (engine && !__cancel_engine(engine) && ban)
+   if (engine && !__cancel_engine(engine) && (ban || !persistent))
/*
 * If we are unable to send a preemptive pulse to bump
 * the context from the GPU, we have to resort to a full
@@ -466,8 +474,6 @@ static void kill_engines(struct i915_gem_engines *engines, 
bool ban)
  
  static void kill_context(struct i915_gem_context *ctx)

  {
-   bool ban = (!i915_gem_context_is_persistent(ctx) ||
-   !ctx->i915->params.enable_hangcheck);
struct i915_gem_engines *pos, *next;
  
  	spin_lock_irq(>stale.lock);

@@ -480,7 +486,8 @@ static void kill_context(struct i915_gem_context *ctx)
  
  		spin_unlock_irq(>stale.lock);
  
-		kill_engines(pos, ban);

+   kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+i915_gem_context_is_persistent(ctx));
  
  		spin_lock_irq(>stale.lock);

GEM_BUG_ON(i915_sw_fence_signaled(>fence));
@@ -526,7 +533,8 @@ static void engines_idle_release(struct i915_gem_context 
*ctx,
  
  kill:

if (list_empty(>link)) /* raced, already closed */
-   kill_engines(engines, true);
+   kill_engines(engines, true,
+i915_gem_context_is_persistent(ctx));
  
  	i915_sw_fence_commit(>fence);

  }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index 4033184f13b9..9d539f48d7c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ 

  1   2   >