Display controller (DC) performs isochronous memory transfers, and thus,
has a requirement for a minimum memory bandwidth that shall be fulfilled,
otherwise framebuffer data can't be fetched fast enough and this results
in a DC's data-FIFO underflow that follows by a visual corruption.

The Memory Controller drivers provide facility for memory bandwidth
management via interconnect API. This patch wires up the interconnect
API support to the DC driver.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/gpu/drm/tegra/dc.c    | 271 +++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/tegra/dc.h    |   8 +
 drivers/gpu/drm/tegra/drm.c   |  19 +++
 drivers/gpu/drm/tegra/plane.c |   1 +
 drivers/gpu/drm/tegra/plane.h |   4 +-
 5 files changed, 299 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 1a7b08f35776..b540ac6ffdc4 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -519,6 +519,136 @@ static void tegra_dc_setup_window(struct tegra_plane 
*plane,
                tegra_plane_setup_blending(plane, window);
 }
 
+static unsigned long
+tegra_plane_memory_bandwidth(struct drm_plane_state *state,
+                            struct tegra_dc_window *window,
+                            unsigned int num,
+                            unsigned int denum)
+{
+       struct tegra_plane_state *tegra_state;
+       struct drm_crtc_state *crtc_state;
+       const struct drm_format_info *fmt;
+       struct tegra_dc_window win;
+       unsigned long long bandwidth;
+       unsigned int bpp_plane;
+       unsigned int bpp;
+       unsigned int mul;
+       unsigned int i;
+
+       if (!state->fb || !state->visible)
+               return 0;
+
+       crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
+       tegra_state = to_tegra_plane_state(state);
+
+       if (!window)
+               window = &win;
+
+       window->src.w = drm_rect_width(&state->src) >> 16;
+       window->src.h = drm_rect_height(&state->src) >> 16;
+       window->dst.w = drm_rect_width(&state->dst);
+       window->dst.h = drm_rect_height(&state->dst);
+       window->tiling = tegra_state->tiling;
+
+       fmt = state->fb->format;
+
+       /*
+        * Note that real memory bandwidth vary depending on format and
+        * memory layout, we are not taking that into account because small
+        * estimation error isn't important since bandwidth is rounded up
+        * anyway.
+        */
+       for (i = 0, bpp = 0; i < fmt->num_planes; i++) {
+               bpp_plane = fmt->cpp[i] * 8;
+
+               /*
+                * Sub-sampling is relevant for chroma planes only and vertical
+                * readouts are not cached, hence only horizontal sub-sampling
+                * matters.
+                */
+               if (i > 0)
+                       bpp_plane /= fmt->hsub;
+
+               bpp += bpp_plane;
+       }
+
+       /*
+        * Horizontal downscale takes extra bandwidth which roughly depends
+        * on the scaled width.
+        */
+       if (window->src.w > window->dst.w)
+               mul = (window->src.w - window->dst.w) * bpp / 2048 + 1;
+       else
+               mul = 1;
+
+       /*
+        * Ignore cursor window if its width is small enough such that
+        * data-prefetch FIFO will easily help to overcome temporal memory
+        * pressure.
+        *
+        * Window A has a 128bit x 128 deep read FIFO, while windows B/C
+        * have a 128bit x 64 deep read FIFO.
+        *
+        * This allows us to not overestimate memory frequency requirement.
+        * Even if it will happen that cursor gets a temporal underflow, this
+        * won't be fatal.
+        */
+       if (state->plane->type == DRM_PLANE_TYPE_CURSOR &&
+           mul == 1 && window->src.w * bpp <= 128 * 16)
+               return 0;
+
+       /* mode.clock in kHz, bandwidth in kbit/s */
+       bandwidth = kbps_to_icc(crtc_state->mode.clock * bpp * mul);
+
+       /* the requested bandwidth should be higher than required */
+       bandwidth *= num;
+       do_div(bandwidth, denum);
+
+       return min_t(u64, bandwidth, ULONG_MAX);
+}
+
+static unsigned long
+tegra20_plane_memory_bandwidth(struct drm_plane_state *state)
+{
+       return tegra_plane_memory_bandwidth(state, NULL, 29, 10);
+}
+
+static unsigned long
+tegra30_plane_memory_bandwidth(struct drm_plane_state *state)
+{
+       struct tegra_dc_window window;
+       unsigned long bandwidth;
+
+       bandwidth = tegra_plane_memory_bandwidth(state, &window, 29, 10);
+
+       /* x2: memory overfetch for tiled framebuffer and DDR3 */
+       if (window.tiling.mode == TEGRA_BO_TILING_MODE_TILED)
+               bandwidth *= 2;
+
+       return bandwidth;
+}
+
+static unsigned long
+tegra114_plane_memory_bandwidth(struct drm_plane_state *state)
+{
+       struct tegra_dc_window window;
+       unsigned long bandwidth;
+
+       bandwidth = tegra_plane_memory_bandwidth(state, &window, 12, 10);
+
+       /* x2: memory overfetch for tiled framebuffer and DDR3 */
+       if (window.tiling.mode == TEGRA_BO_TILING_MODE_TILED)
+               bandwidth *= 2;
+
+       return bandwidth;
+}
+
+static unsigned long
+tegra124_plane_memory_bandwidth(struct drm_plane_state *state)
+{
+       return tegra_plane_memory_bandwidth(state, NULL, 12, 10);
+}
+
 static const u32 tegra20_primary_formats[] = {
        DRM_FORMAT_ARGB4444,
        DRM_FORMAT_ARGB1555,
@@ -608,8 +738,10 @@ static int tegra_plane_atomic_check(struct drm_plane 
*plane,
        int err;
 
        /* no need for further checks if the plane is being disabled */
-       if (!state->crtc)
+       if (!state->crtc) {
+               plane_state->memory_bandwidth = 0;
                return 0;
+       }
 
        err = tegra_plane_format(state->fb->format->format,
                                 &plane_state->format,
@@ -662,6 +794,8 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
        if (err < 0)
                return err;
 
+       plane_state->memory_bandwidth = dc->soc->plane_memory_bandwidth(state);
+
        return 0;
 }
 
@@ -1186,6 +1320,7 @@ tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
        copy->pclk = state->pclk;
        copy->div = state->div;
        copy->planes = state->planes;
+       copy->memory_bandwidth = state->memory_bandwidth;
 
        return &copy->base;
 }
@@ -1777,6 +1912,8 @@ static void tegra_crtc_atomic_disable(struct drm_crtc 
*crtc,
        err = host1x_client_suspend(&dc->client);
        if (err < 0)
                dev_err(dc->dev, "failed to suspend: %d\n", err);
+
+       icc_set_bw(dc->icc_bandwidth, 0, 0);
 }
 
 static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
@@ -1788,6 +1925,9 @@ static void tegra_crtc_atomic_enable(struct drm_crtc 
*crtc,
        u32 value;
        int err;
 
+       icc_set_bw(dc->icc_bandwidth, state->memory_bandwidth,
+                  state->memory_bandwidth);
+
        err = host1x_client_resume(&dc->client);
        if (err < 0) {
                dev_err(dc->dev, "failed to resume: %d\n", err);
@@ -1901,6 +2041,9 @@ static void tegra_crtc_atomic_enable(struct drm_crtc 
*crtc,
 static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
                                    struct drm_crtc_state *old_crtc_state)
 {
+       struct tegra_dc_state *dc_old_state = to_dc_state(old_crtc_state);
+       struct tegra_dc_state *dc_state = to_dc_state(crtc->state);
+       struct tegra_dc *dc = to_tegra_dc(crtc);
        unsigned long flags;
 
        if (crtc->state->event) {
@@ -1915,6 +2058,25 @@ static void tegra_crtc_atomic_begin(struct drm_crtc 
*crtc,
 
                crtc->state->event = NULL;
        }
+
+       if (old_crtc_state && old_crtc_state->active) {
+               /*
+                * Raise memory bandwidth before changes take effect if it
+                * goes from low to high.
+                */
+               if (dc_old_state->memory_bandwidth < dc_state->memory_bandwidth)
+                       icc_set_bw(dc->icc_bandwidth,
+                                  dc_state->memory_bandwidth,
+                                  dc_state->memory_bandwidth);
+       } else {
+               /*
+                * Raise memory bandwidth before changes take effect if
+                * CRTC is turning on.
+                */
+               icc_set_bw(dc->icc_bandwidth,
+                          dc_state->memory_bandwidth,
+                          dc_state->memory_bandwidth);
+       }
 }
 
 static void tegra_crtc_atomic_flush(struct drm_crtc *crtc,
@@ -1933,7 +2095,80 @@ static void tegra_crtc_atomic_flush(struct drm_crtc 
*crtc,
        value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 }
 
+static bool
+tegra_plane_overlaps_other_plane(struct drm_crtc_state *state,
+                                const struct drm_plane_state *plane_state)
+{
+       const struct drm_plane_state *other_state;
+       struct drm_plane *plane;
+       struct drm_rect rect;
+
+       drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) {
+               rect = plane_state->dst;
+
+               if (other_state == plane_state)
+                       continue;
+
+               if (!other_state->visible || !other_state->fb)
+                       continue;
+
+               if (drm_rect_intersect(&rect, &other_state->dst))
+                       return true;
+       }
+
+       return false;
+}
+
+static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
+                                  struct drm_crtc_state *state)
+{
+       struct tegra_dc_state *dc_state = to_dc_state(state);
+       const struct drm_plane_state *plane_state;
+       const struct tegra_plane_state *tegra;
+       unsigned long long bandwidth = 0;
+       struct drm_plane *plane;
+
+       /*
+        * For overlapping planes pixel's data is fetched for each plane at
+        * the same time, hence bandwidth is accumulated in this case.
+        */
+       drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, state) {
+               tegra = to_tegra_plane_state(plane_state);
+
+               if (tegra_plane_overlaps_other_plane(state, plane_state))
+                       bandwidth += tegra->memory_bandwidth;
+               else
+                       bandwidth = max_t(u64, bandwidth,
+                                         tegra->memory_bandwidth);
+       }
+
+       dc_state->memory_bandwidth = min_t(u64, bandwidth, U32_MAX);
+
+       return 0;
+}
+
+void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
+                                  struct drm_crtc_state *old_crtc_state)
+{
+       struct tegra_dc_state *dc_old_state = to_dc_state(old_crtc_state);
+       struct tegra_dc_state *dc_state = to_dc_state(crtc->state);
+       struct tegra_dc *dc = to_tegra_dc(crtc);
+
+       if (!dc_old_state)
+               return;
+
+       /*
+        * Drop memory bandwidth after changes take effect if it goes from
+        * high to low.
+        */
+       if (dc_old_state->memory_bandwidth > dc_state->memory_bandwidth)
+               icc_set_bw(dc->icc_bandwidth,
+                          dc_state->memory_bandwidth,
+                          dc_state->memory_bandwidth);
+}
+
 static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
+       .atomic_check = tegra_crtc_atomic_check,
        .atomic_begin = tegra_crtc_atomic_begin,
        .atomic_flush = tegra_crtc_atomic_flush,
        .atomic_enable = tegra_crtc_atomic_enable,
@@ -2225,6 +2460,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info 
= {
        .modifiers = tegra20_modifiers,
        .has_win_a_without_filters = true,
        .has_win_c_without_vert_filter = true,
+       .plane_memory_bandwidth = tegra20_plane_memory_bandwidth,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -2244,6 +2480,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info 
= {
        .modifiers = tegra20_modifiers,
        .has_win_a_without_filters = false,
        .has_win_c_without_vert_filter = false,
+       .plane_memory_bandwidth = tegra30_plane_memory_bandwidth,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -2263,6 +2500,7 @@ static const struct tegra_dc_soc_info 
tegra114_dc_soc_info = {
        .modifiers = tegra20_modifiers,
        .has_win_a_without_filters = false,
        .has_win_c_without_vert_filter = false,
+       .plane_memory_bandwidth = tegra114_plane_memory_bandwidth,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -2282,6 +2520,7 @@ static const struct tegra_dc_soc_info 
tegra124_dc_soc_info = {
        .modifiers = tegra124_modifiers,
        .has_win_a_without_filters = false,
        .has_win_c_without_vert_filter = false,
+       .plane_memory_bandwidth = tegra124_plane_memory_bandwidth,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -2301,6 +2540,7 @@ static const struct tegra_dc_soc_info 
tegra210_dc_soc_info = {
        .modifiers = tegra124_modifiers,
        .has_win_a_without_filters = false,
        .has_win_c_without_vert_filter = false,
+       .plane_memory_bandwidth = tegra124_plane_memory_bandwidth,
 };
 
 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -2349,6 +2589,7 @@ static const struct tegra_dc_soc_info 
tegra186_dc_soc_info = {
        .has_nvdisplay = true,
        .wgrps = tegra186_dc_wgrps,
        .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
+       .plane_memory_bandwidth = tegra124_plane_memory_bandwidth,
 };
 
 static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = {
@@ -2397,6 +2638,7 @@ static const struct tegra_dc_soc_info 
tegra194_dc_soc_info = {
        .has_nvdisplay = true,
        .wgrps = tegra194_dc_wgrps,
        .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps),
+       .plane_memory_bandwidth = tegra124_plane_memory_bandwidth,
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {
@@ -2503,6 +2745,7 @@ static int tegra_dc_couple(struct tegra_dc *dc)
 
 static int tegra_dc_probe(struct platform_device *pdev)
 {
+       const char *level = KERN_ERR;
        struct tegra_dc *dc;
        int err;
 
@@ -2571,8 +2814,6 @@ static int tegra_dc_probe(struct platform_device *pdev)
 
        err = tegra_dc_rgb_probe(dc);
        if (err < 0 && err != -ENODEV) {
-               const char *level = KERN_ERR;
-
                if (err == -EPROBE_DEFER)
                        level = KERN_DEBUG;
 
@@ -2581,6 +2822,25 @@ static int tegra_dc_probe(struct platform_device *pdev)
                return err;
        }
 
+       /*
+        * The display controller memory bandwidth management isn't trivial
+        * because it requires the knowledge about the DC hardware state
+        * in order to make a proper decisions. It's not easy to convey
+        * that information to the ICC provider, so we will just use the
+        * first interconnect path for the memory bandwidth management and
+        * make all the decisions within the DC driver, for simplicity.
+        */
+       dc->icc_bandwidth = of_icc_get(dc->dev, "display0a");
+       err = PTR_ERR_OR_ZERO(dc->icc_bandwidth);
+       if (err) {
+               if (err == -EPROBE_DEFER)
+                       level = KERN_DEBUG;
+
+               dev_printk(level, dc->dev,
+                          "failed to get display0a interconnect: %d\n", err);
+               goto remove_rgb;
+       }
+
        platform_set_drvdata(pdev, dc);
        pm_runtime_enable(&pdev->dev);
 
@@ -2599,6 +2859,9 @@ static int tegra_dc_probe(struct platform_device *pdev)
 
 disable_pm:
        pm_runtime_disable(&pdev->dev);
+       icc_put(dc->icc_bandwidth);
+
+remove_rgb:
        tegra_dc_rgb_remove(dc);
 
        return err;
@@ -2616,6 +2879,8 @@ static int tegra_dc_remove(struct platform_device *pdev)
                return err;
        }
 
+       icc_put(dc->icc_bandwidth);
+
        err = tegra_dc_rgb_remove(dc);
        if (err < 0) {
                dev_err(&pdev->dev, "failed to remove RGB output: %d\n", err);
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 3d8ddccd758f..3a0ff57c5169 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -8,6 +8,7 @@
 #define TEGRA_DC_H 1
 
 #include <linux/host1x.h>
+#include <linux/interconnect.h>
 
 #include <drm/drm_crtc.h>
 
@@ -23,6 +24,8 @@ struct tegra_dc_state {
        unsigned int div;
 
        u32 planes;
+
+       unsigned long memory_bandwidth;
 };
 
 static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state)
@@ -66,6 +69,7 @@ struct tegra_dc_soc_info {
        const u64 *modifiers;
        bool has_win_a_without_filters;
        bool has_win_c_without_vert_filter;
+       unsigned long (*plane_memory_bandwidth)(struct drm_plane_state *state);
 };
 
 struct tegra_dc {
@@ -90,6 +94,8 @@ struct tegra_dc {
        struct drm_info_list *debugfs_files;
 
        const struct tegra_dc_soc_info *soc;
+
+       struct icc_path *icc_bandwidth;
 };
 
 static inline struct tegra_dc *
@@ -150,6 +156,8 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc,
                               struct drm_crtc_state *crtc_state,
                               struct clk *clk, unsigned long pclk,
                               unsigned int div);
+void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
+                                  struct drm_crtc_state *old_crtc_state);
 
 /* from rgb.c */
 int tegra_dc_rgb_probe(struct tegra_dc *dc);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bd268028fb3d..44f558adddff 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -20,6 +20,7 @@
 #include <drm/drm_prime.h>
 #include <drm/drm_vblank.h>
 
+#include "dc.h"
 #include "drm.h"
 #include "gem.h"
 
@@ -59,6 +60,22 @@ static const struct drm_mode_config_funcs 
tegra_drm_mode_config_funcs = {
        .atomic_commit = drm_atomic_helper_commit,
 };
 
+static void tegra_atomic_post_commit(struct drm_device *drm,
+                                    struct drm_atomic_state *old_state)
+{
+       struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+       struct drm_crtc *crtc;
+       unsigned int i;
+
+       for_each_oldnew_crtc_in_state(old_state, crtc, old_crtc_state,
+                                     new_crtc_state, i) {
+               if (!new_crtc_state->active)
+                       continue;
+
+               tegra_crtc_atomic_post_commit(crtc, old_crtc_state);
+       }
+}
+
 static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
 {
        struct drm_device *drm = old_state->dev;
@@ -75,6 +92,8 @@ static void tegra_atomic_commit_tail(struct drm_atomic_state 
*old_state)
        } else {
                drm_atomic_helper_commit_tail_rpm(old_state);
        }
+
+       tegra_atomic_post_commit(drm, old_state);
 }
 
 static const struct drm_mode_config_helper_funcs
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 9ccfb56e9b01..f94925744105 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -63,6 +63,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
        copy->swap = state->swap;
        copy->bottom_up = state->bottom_up;
        copy->opaque = state->opaque;
+       copy->memory_bandwidth = state->memory_bandwidth;
 
        for (i = 0; i < 2; i++)
                copy->blending[i] = state->blending[i];
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
index a158a915109a..5227c7a9ad8b 100644
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -51,10 +51,12 @@ struct tegra_plane_state {
        /* used for legacy blending support only */
        struct tegra_plane_legacy_blending_state blending[2];
        bool opaque;
+
+       unsigned long memory_bandwidth;
 };
 
 static inline struct tegra_plane_state *
-to_tegra_plane_state(struct drm_plane_state *state)
+to_tegra_plane_state(const struct drm_plane_state *state)
 {
        if (state)
                return container_of(state, struct tegra_plane_state, base);
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to