[Intel-gfx] [GIT PULL] GVT next changes for drm-intel-next

2022-03-07 Thread Wang, Zhi A
Hi folks:

Here is a new pull request of gvt-next. It contains a small patch to add the 
missing
mdev attribute name, which will be used by the middleware, like kubevirt.

This pull has been tested by:

$ dim apply-pull drm-intel-next < this_email.eml

The following changes since commit 30424ebae8df0f786835e7a31ad790fa00764f35:

  Merge tag 'drm-intel-gt-next-2022-02-17' of 
git://anongit.freedesktop.org/drm/drm-intel into drm-intel-next (2022-02-23 
15:03:51 -0500)

are available in the Git repository at:

  https://github.com/intel/gvt-linux tags/gvt-next-2022-03-07

for you to fetch changes up to 43d26c4fc6c446d766253d546f0083d78023d34a:

  drm/i915/gvt: add the missing mdev attribute "name" (2022-03-07 12:21:58 
-0500)


- add the missing attribute "name" in VFIO mdev hierarchy.


Zhi Wang (1):
  drm/i915/gvt: add the missing mdev attribute "name"

 drivers/gpu/drm/i915/gvt/kvmgt.c | 15 +++
 1 file changed, 15 insertions(+)



[Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Clean up some dpll stuff (rev3)

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915: Clean up some dpll stuff (rev3)
URL   : https://patchwork.freedesktop.org/series/100899/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




[Intel-gfx] [PATCH v2 4/8] drm/i915: Store the m2 divider as a whole in bxt_clk_div

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

Get rid of the pointless m2 int vs. frac split in bxt_clk_div
and just store the whole divider as one.

v2: Document the full divider as a proper decimal number

Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 2a88c6fa1f34..ae3c07cc2eaa 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2088,8 +2088,7 @@ struct bxt_clk_div {
int clock;
u32 p1;
u32 p2;
-   u32 m2_int;
-   u32 m2_frac;
+   u32 m2;
u32 n;
 
int vco;
@@ -2097,13 +2096,14 @@ struct bxt_clk_div {
 
 /* pre-calculated values for DP linkrates */
 static const struct bxt_clk_div bxt_dp_clk_val[] = {
-   { .clock = 162000, .p1 = 4, .p2 = 2, .m2_int = 32, .m2_frac = 1677722, 
.n = 1, },
-   { .clock = 27, .p1 = 4, .p2 = 1, .m2_int = 27, .m2_frac =   0, 
.n = 1, },
-   { .clock = 54, .p1 = 2, .p2 = 1, .m2_int = 27, .m2_frac =   0, 
.n = 1, },
-   { .clock = 216000, .p1 = 3, .p2 = 2, .m2_int = 32, .m2_frac = 1677722, 
.n = 1, },
-   { .clock = 243000, .p1 = 4, .p2 = 1, .m2_int = 24, .m2_frac = 1258291, 
.n = 1, },
-   { .clock = 324000, .p1 = 4, .p2 = 1, .m2_int = 32, .m2_frac = 1677722, 
.n = 1, },
-   { .clock = 432000, .p1 = 3, .p2 = 1, .m2_int = 32, .m2_frac = 1677722, 
.n = 1, },
+   /* m2 is .22 binary fixed point */
+   { .clock = 162000, .p1 = 4, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
+   { .clock = 27, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ 
},
+   { .clock = 54, .p1 = 2, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ 
},
+   { .clock = 216000, .p1 = 3, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
+   { .clock = 243000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x613 /* 24.3 */ 
},
+   { .clock = 324000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
+   { .clock = 432000, .p1 = 3, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
 };
 
 static bool
@@ -2130,8 +2130,7 @@ bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state 
*crtc_state,
clk_div->p2 = best_clock.p2;
drm_WARN_ON(>drm, best_clock.m1 != 2);
clk_div->n = best_clock.n;
-   clk_div->m2_int = best_clock.m2 >> 22;
-   clk_div->m2_frac = best_clock.m2 & ((1 << 22) - 1);
+   clk_div->m2 = best_clock.m2;
 
clk_div->vco = best_clock.vco;
 
@@ -2200,11 +2199,11 @@ static bool bxt_ddi_set_dpll_hw_state(struct 
intel_crtc_state *crtc_state,
lanestagger = 0x02;
 
dpll_hw_state->ebb0 = PORT_PLL_P1(clk_div->p1) | 
PORT_PLL_P2(clk_div->p2);
-   dpll_hw_state->pll0 = PORT_PLL_M2_INT(clk_div->m2_int);
+   dpll_hw_state->pll0 = PORT_PLL_M2_INT(clk_div->m2 >> 22);
dpll_hw_state->pll1 = PORT_PLL_N(clk_div->n);
-   dpll_hw_state->pll2 = PORT_PLL_M2_FRAC(clk_div->m2_frac);
+   dpll_hw_state->pll2 = PORT_PLL_M2_FRAC(clk_div->m2 & 0x3f);
 
-   if (clk_div->m2_frac)
+   if (clk_div->m2 & 0x3f)
dpll_hw_state->pll3 = PORT_PLL_M2_FRAC_ENABLE;
 
dpll_hw_state->pll6 = PORT_PLL_PROP_COEFF(prop_coef) |
-- 
2.34.1



[Intel-gfx] [PATCH v2 8/8] drm/i915: Remove struct dp_link_dpll

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

struct dp_link_dpll is a pointless wrapper around struct dpll.
Just store the desired link rate into struct dpll::dot and
we're done.

v2: Document the full divider as a proper decimal number on chv
Nuke bogus eDP 1.4 comments for chv while at it

Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/g4x_dp.c | 55 +--
 1 file changed, 17 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c 
b/drivers/gpu/drm/i915/display/g4x_dp.c
index 22345051e667..8e1338678d91 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -24,58 +24,37 @@
 #include "intel_pps.h"
 #include "vlv_sideband.h"
 
-struct dp_link_dpll {
-   int clock;
-   struct dpll dpll;
+static const struct dpll g4x_dpll[] = {
+   { .dot = 162000, .p1 = 2, .p2 = 10, .n = 2, .m1 = 23, .m2 = 8, },
+   { .dot = 27, .p1 = 1, .p2 = 10, .n = 1, .m1 = 14, .m2 = 2, },
 };
 
-static const struct dp_link_dpll g4x_dpll[] = {
-   { 162000,
-   { .p1 = 2, .p2 = 10, .n = 2, .m1 = 23, .m2 = 8 } },
-   { 27,
-   { .p1 = 1, .p2 = 10, .n = 1, .m1 = 14, .m2 = 2 } }
+static const struct dpll pch_dpll[] = {
+   { .dot = 162000, .p1 = 2, .p2 = 10, .n = 1, .m1 = 12, .m2 = 9, },
+   { .dot = 27, .p1 = 1, .p2 = 10, .n = 2, .m1 = 14, .m2 = 8, },
 };
 
-static const struct dp_link_dpll pch_dpll[] = {
-   { 162000,
-   { .p1 = 2, .p2 = 10, .n = 1, .m1 = 12, .m2 = 9 } },
-   { 27,
-   { .p1 = 1, .p2 = 10, .n = 2, .m1 = 14, .m2 = 8 } }
+static const struct dpll vlv_dpll[] = {
+   { .dot = 162000, .p1 = 3, .p2 = 2, .n = 5, .m1 = 3, .m2 = 81, },
+   { .dot = 27, .p1 = 2, .p2 = 2, .n = 1, .m1 = 2, .m2 = 27, },
 };
 
-static const struct dp_link_dpll vlv_dpll[] = {
-   { 162000,
-   { .p1 = 3, .p2 = 2, .n = 5, .m1 = 3, .m2 = 81 } },
-   { 27,
-   { .p1 = 2, .p2 = 2, .n = 1, .m1 = 2, .m2 = 27 } }
-};
-
-/*
- * CHV supports eDP 1.4 that have  more link rates.
- * Below only provides the fixed rate but exclude variable rate.
- */
-static const struct dp_link_dpll chv_dpll[] = {
-   /*
-* CHV requires to program fractional division for m2.
-* m2 is stored in fixed point format using formula below
-* (m2_int << 22) | m2_fraction
-*/
-   { 162000,   /* m2_int = 32, m2_fraction = 1677722 */
-   { .p1 = 4, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x81a } },
-   { 27,   /* m2_int = 27, m2_fraction = 0 */
-   { .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c0 } },
+static const struct dpll chv_dpll[] = {
+   /* m2 is .22 binary fixed point  */
+   { .dot = 162000, .p1 = 4, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x81a /* 
32.4 */ },
+   { .dot = 27, .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c0 /* 
27.0 */ },
 };
 
 const struct dpll *vlv_get_dpll(struct drm_i915_private *i915)
 {
-   return IS_CHERRYVIEW(i915) ? _dpll[0].dpll : _dpll[0].dpll;
+   return IS_CHERRYVIEW(i915) ? _dpll[0] : _dpll[0];
 }
 
 void g4x_dp_set_clock(struct intel_encoder *encoder,
  struct intel_crtc_state *pipe_config)
 {
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-   const struct dp_link_dpll *divisor = NULL;
+   const struct dpll *divisor = NULL;
int i, count = 0;
 
if (IS_G4X(dev_priv)) {
@@ -94,8 +73,8 @@ void g4x_dp_set_clock(struct intel_encoder *encoder,
 
if (divisor && count) {
for (i = 0; i < count; i++) {
-   if (pipe_config->port_clock == divisor[i].clock) {
-   pipe_config->dpll = divisor[i].dpll;
+   if (pipe_config->port_clock == divisor[i].dot) {
+   pipe_config->dpll = divisor[i];
pipe_config->clock_set = true;
break;
}
-- 
2.34.1



[Intel-gfx] [PATCH v2 7/8] drm/i915: Populate bxt/glk DPLL clock limits a bit more

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

Set the bxt/glk DPLL min dotclock to 25MHz (HDMI minimum)
and the max to 594 MHz (HDMI max). The supported DP frequencies
(162MHz-540MHz) fit within the same range.

Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_dpll.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c 
b/drivers/gpu/drm/i915/display/intel_dpll.c
index f4e5290b86a4..7377f1f009ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -285,8 +285,7 @@ static const struct intel_limit intel_limits_chv = {
 };
 
 static const struct intel_limit intel_limits_bxt = {
-   /* FIXME: find real dot limits */
-   .dot = { .min = 0, .max = INT_MAX },
+   .dot = { .min = 25000, .max = 594000 },
.vco = { .min = 480, .max = 670 },
.n = { .min = 1, .max = 1 },
.m1 = { .min = 2, .max = 2 },
-- 
2.34.1



[Intel-gfx] [PATCH v2 6/8] drm/i915: Replace hand rolled bxt vco calculation with chv_calc_dpll_params()

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

Use chv_calc_dpll_params() to calculate the BXT DP DPLL VCO
frequency.

We need to add the m1 divider into bxt_dp_clk_val[] for this to work.

v2: Make the WARN_ON() sensible

Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 23 +++
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 4b52086aa9e9..b7071da4b7e5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2086,13 +2086,13 @@ static bool bxt_ddi_pll_get_hw_state(struct 
drm_i915_private *dev_priv,
 /* pre-calculated values for DP linkrates */
 static const struct dpll bxt_dp_clk_val[] = {
/* m2 is .22 binary fixed point */
-   { .dot = 162000, .p1 = 4, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ },
-   { .dot = 27, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ },
-   { .dot = 54, .p1 = 2, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ },
-   { .dot = 216000, .p1 = 3, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ },
-   { .dot = 243000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x613 /* 24.3 */ },
-   { .dot = 324000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ },
-   { .dot = 432000, .p1 = 3, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ },
+   { .dot = 162000, .p1 = 4, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x81a /* 
32.4 */ },
+   { .dot = 27, .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c0 /* 
27.0 */ },
+   { .dot = 54, .p1 = 2, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c0 /* 
27.0 */ },
+   { .dot = 216000, .p1 = 3, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x81a /* 
32.4 */ },
+   { .dot = 243000, .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x613 /* 
24.3 */ },
+   { .dot = 324000, .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x81a /* 
32.4 */ },
+   { .dot = 432000, .p1 = 3, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x81a /* 
32.4 */ },
 };
 
 static bool
@@ -2122,18 +2122,21 @@ bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state 
*crtc_state,
 static void bxt_ddi_dp_pll_dividers(struct intel_crtc_state *crtc_state,
struct dpll *clk_div)
 {
-   int clock = crtc_state->port_clock;
+   struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
int i;
 
*clk_div = bxt_dp_clk_val[0];
for (i = 0; i < ARRAY_SIZE(bxt_dp_clk_val); ++i) {
-   if (bxt_dp_clk_val[i].dot == clock) {
+   if (crtc_state->port_clock == bxt_dp_clk_val[i].dot) {
*clk_div = bxt_dp_clk_val[i];
break;
}
}
 
-   clk_div->vco = clock * 10 / 2 * clk_div->p1 * clk_div->p2;
+   chv_calc_dpll_params(i915->dpll.ref_clks.nssc, clk_div);
+
+   drm_WARN_ON(>drm, clk_div->vco == 0 ||
+   clk_div->dot != crtc_state->port_clock);
 }
 
 static bool bxt_ddi_set_dpll_hw_state(struct intel_crtc_state *crtc_state,
-- 
2.34.1



[Intel-gfx] [PATCH v2 5/8] drm/i915: Replace bxt_clk_div with struct dpll

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

bxt_clk_div is basically the same as struct dpll. Just use the latter.

Reviewed-by: Jani Nikula 
Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 50 ++-
 1 file changed, 16 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index ae3c07cc2eaa..4b52086aa9e9 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2083,69 +2083,51 @@ static bool bxt_ddi_pll_get_hw_state(struct 
drm_i915_private *dev_priv,
return ret;
 }
 
-/* bxt clock parameters */
-struct bxt_clk_div {
-   int clock;
-   u32 p1;
-   u32 p2;
-   u32 m2;
-   u32 n;
-
-   int vco;
-};
-
 /* pre-calculated values for DP linkrates */
-static const struct bxt_clk_div bxt_dp_clk_val[] = {
+static const struct dpll bxt_dp_clk_val[] = {
/* m2 is .22 binary fixed point */
-   { .clock = 162000, .p1 = 4, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
-   { .clock = 27, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ 
},
-   { .clock = 54, .p1 = 2, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ 
},
-   { .clock = 216000, .p1 = 3, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
-   { .clock = 243000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x613 /* 24.3 */ 
},
-   { .clock = 324000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
-   { .clock = 432000, .p1 = 3, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ 
},
+   { .dot = 162000, .p1 = 4, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ },
+   { .dot = 27, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ },
+   { .dot = 54, .p1 = 2, .p2 = 1, .n = 1, .m2 = 0x6c0 /* 27.0 */ },
+   { .dot = 216000, .p1 = 3, .p2 = 2, .n = 1, .m2 = 0x81a /* 32.4 */ },
+   { .dot = 243000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x613 /* 24.3 */ },
+   { .dot = 324000, .p1 = 4, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ },
+   { .dot = 432000, .p1 = 3, .p2 = 1, .n = 1, .m2 = 0x81a /* 32.4 */ },
 };
 
 static bool
 bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state,
- struct bxt_clk_div *clk_div)
+ struct dpll *clk_div)
 {
struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-   struct dpll best_clock;
 
/* Calculate HDMI div */
/*
 * FIXME: tie the following calculation into
 * i9xx_crtc_compute_clock
 */
-   if (!bxt_find_best_dpll(crtc_state, _clock)) {
+   if (!bxt_find_best_dpll(crtc_state, clk_div)) {
drm_dbg(>drm, "no PLL dividers found for clock %d pipe 
%c\n",
crtc_state->port_clock,
pipe_name(crtc->pipe));
return false;
}
 
-   clk_div->p1 = best_clock.p1;
-   clk_div->p2 = best_clock.p2;
-   drm_WARN_ON(>drm, best_clock.m1 != 2);
-   clk_div->n = best_clock.n;
-   clk_div->m2 = best_clock.m2;
-
-   clk_div->vco = best_clock.vco;
+   drm_WARN_ON(>drm, clk_div->m1 != 2);
 
return true;
 }
 
 static void bxt_ddi_dp_pll_dividers(struct intel_crtc_state *crtc_state,
-   struct bxt_clk_div *clk_div)
+   struct dpll *clk_div)
 {
int clock = crtc_state->port_clock;
int i;
 
*clk_div = bxt_dp_clk_val[0];
for (i = 0; i < ARRAY_SIZE(bxt_dp_clk_val); ++i) {
-   if (bxt_dp_clk_val[i].clock == clock) {
+   if (bxt_dp_clk_val[i].dot == clock) {
*clk_div = bxt_dp_clk_val[i];
break;
}
@@ -2155,7 +2137,7 @@ static void bxt_ddi_dp_pll_dividers(struct 
intel_crtc_state *crtc_state,
 }
 
 static bool bxt_ddi_set_dpll_hw_state(struct intel_crtc_state *crtc_state,
- const struct bxt_clk_div *clk_div)
+ const struct dpll *clk_div)
 {
struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
struct intel_dpll_hw_state *dpll_hw_state = _state->dpll_hw_state;
@@ -2227,7 +2209,7 @@ static bool bxt_ddi_set_dpll_hw_state(struct 
intel_crtc_state *crtc_state,
 static bool
 bxt_ddi_dp_set_dpll_hw_state(struct intel_crtc_state *crtc_state)
 {
-   struct bxt_clk_div clk_div = {};
+   struct dpll clk_div = {};
 
bxt_ddi_dp_pll_dividers(crtc_state, _div);
 
@@ -2237,7 +2219,7 @@ bxt_ddi_dp_set_dpll_hw_state(struct intel_crtc_state 
*crtc_state)
 static bool
 bxt_ddi_hdmi_set_dpll_hw_state(struct intel_crtc_state *crtc_state)
 {
-   struct bxt_clk_div clk_div = {};
+   struct dpll clk_div = {};
 
bxt_ddi_hdmi_pll_dividers(crtc_state, _div);
 
-- 
2.34.1



[Intel-gfx] [PATCH v2 2/8] drm/i915: Remove redundant/wrong comments

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

Remove the comment specifying the exact formulat for calculating
the DPLL frequency from the *_find_best_dpll() functions. Each
platform variant has its own way to calculate these and we have
the code already to do that. These comments are entirely redundant
and often even wrong so just get rid of them.

Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_dpll.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c 
b/drivers/gpu/drm/i915/display/intel_dpll.c
index b3fd94538c44..f4e5290b86a4 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -425,8 +425,7 @@ i9xx_select_p2_div(const struct intel_limit *limit,
 
 /*
  * Returns a set of divisors for the desired target clock with the given
- * refclk, or FALSE.  The returned values represent the clock equation:
- * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+ * refclk, or FALSE.
  *
  * Target and reference clocks are specified in kHz.
  *
@@ -484,8 +483,7 @@ i9xx_find_best_dpll(const struct intel_limit *limit,
 
 /*
  * Returns a set of divisors for the desired target clock with the given
- * refclk, or FALSE.  The returned values represent the clock equation:
- * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+ * refclk, or FALSE.
  *
  * Target and reference clocks are specified in kHz.
  *
@@ -541,8 +539,7 @@ pnv_find_best_dpll(const struct intel_limit *limit,
 
 /*
  * Returns a set of divisors for the desired target clock with the given
- * refclk, or FALSE.  The returned values represent the clock equation:
- * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+ * refclk, or FALSE.
  *
  * Target and reference clocks are specified in kHz.
  *
@@ -641,8 +638,7 @@ static bool vlv_PLL_is_optimal(struct drm_device *dev, int 
target_freq,
 
 /*
  * Returns a set of divisors for the desired target clock with the given
- * refclk, or FALSE.  The returned values represent the clock equation:
- * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+ * refclk, or FALSE.
  */
 static bool
 vlv_find_best_dpll(const struct intel_limit *limit,
@@ -700,8 +696,7 @@ vlv_find_best_dpll(const struct intel_limit *limit,
 
 /*
  * Returns a set of divisors for the desired target clock with the given
- * refclk, or FALSE.  The returned values represent the clock equation:
- * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+ * refclk, or FALSE.
  */
 static bool
 chv_find_best_dpll(const struct intel_limit *limit,
-- 
2.34.1



[Intel-gfx] [PATCH v2 3/8] drm/i915: Clean up bxt/glk PLL registers

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

Use REG_BIT() & co. for bxt/glk PLL registers.

Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 32 +-
 drivers/gpu/drm/i915/gvt/handlers.c   | 15 +++--
 drivers/gpu/drm/i915/i915_reg.h   | 61 ++-
 3 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 4595795d694f..2a88c6fa1f34 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -1898,7 +1898,7 @@ static void bxt_ddi_pll_enable(struct drm_i915_private 
*dev_priv,
 
/* Write M2 integer */
temp = intel_de_read(dev_priv, BXT_PORT_PLL(phy, ch, 0));
-   temp &= ~PORT_PLL_M2_MASK;
+   temp &= ~PORT_PLL_M2_INT_MASK;
temp |= pll->state.hw_state.pll0;
intel_de_write(dev_priv, BXT_PORT_PLL(phy, ch, 0), temp);
 
@@ -2034,7 +2034,7 @@ static bool bxt_ddi_pll_get_hw_state(struct 
drm_i915_private *dev_priv,
hw_state->ebb4 &= PORT_PLL_10BIT_CLK_ENABLE;
 
hw_state->pll0 = intel_de_read(dev_priv, BXT_PORT_PLL(phy, ch, 0));
-   hw_state->pll0 &= PORT_PLL_M2_MASK;
+   hw_state->pll0 &= PORT_PLL_M2_INT_MASK;
 
hw_state->pll1 = intel_de_read(dev_priv, BXT_PORT_PLL(phy, ch, 1));
hw_state->pll1 &= PORT_PLL_N_MASK;
@@ -2200,23 +2200,23 @@ static bool bxt_ddi_set_dpll_hw_state(struct 
intel_crtc_state *crtc_state,
lanestagger = 0x02;
 
dpll_hw_state->ebb0 = PORT_PLL_P1(clk_div->p1) | 
PORT_PLL_P2(clk_div->p2);
-   dpll_hw_state->pll0 = clk_div->m2_int;
+   dpll_hw_state->pll0 = PORT_PLL_M2_INT(clk_div->m2_int);
dpll_hw_state->pll1 = PORT_PLL_N(clk_div->n);
-   dpll_hw_state->pll2 = clk_div->m2_frac;
+   dpll_hw_state->pll2 = PORT_PLL_M2_FRAC(clk_div->m2_frac);
 
if (clk_div->m2_frac)
dpll_hw_state->pll3 = PORT_PLL_M2_FRAC_ENABLE;
 
-   dpll_hw_state->pll6 = prop_coef | PORT_PLL_INT_COEFF(int_coef);
-   dpll_hw_state->pll6 |= PORT_PLL_GAIN_CTL(gain_ctl);
+   dpll_hw_state->pll6 = PORT_PLL_PROP_COEFF(prop_coef) |
+   PORT_PLL_INT_COEFF(int_coef) |
+   PORT_PLL_GAIN_CTL(gain_ctl);
 
-   dpll_hw_state->pll8 = targ_cnt;
+   dpll_hw_state->pll8 = PORT_PLL_TARGET_CNT(targ_cnt);
 
-   dpll_hw_state->pll9 = 5 << PORT_PLL_LOCK_THRESHOLD_SHIFT;
+   dpll_hw_state->pll9 = PORT_PLL_LOCK_THRESHOLD(5);
 
-   dpll_hw_state->pll10 =
-   PORT_PLL_DCO_AMP(PORT_PLL_DCO_AMP_DEFAULT)
-   | PORT_PLL_DCO_AMP_OVR_EN_H;
+   dpll_hw_state->pll10 = PORT_PLL_DCO_AMP(15) |
+   PORT_PLL_DCO_AMP_OVR_EN_H;
 
dpll_hw_state->ebb4 = PORT_PLL_10BIT_CLK_ENABLE;
 
@@ -2252,12 +2252,12 @@ static int bxt_ddi_pll_get_freq(struct drm_i915_private 
*i915,
struct dpll clock;
 
clock.m1 = 2;
-   clock.m2 = (pll_state->pll0 & PORT_PLL_M2_MASK) << 22;
+   clock.m2 = REG_FIELD_GET(PORT_PLL_M2_INT_MASK, pll_state->pll0) << 22;
if (pll_state->pll3 & PORT_PLL_M2_FRAC_ENABLE)
-   clock.m2 |= pll_state->pll2 & PORT_PLL_M2_FRAC_MASK;
-   clock.n = (pll_state->pll1 & PORT_PLL_N_MASK) >> PORT_PLL_N_SHIFT;
-   clock.p1 = (pll_state->ebb0 & PORT_PLL_P1_MASK) >> PORT_PLL_P1_SHIFT;
-   clock.p2 = (pll_state->ebb0 & PORT_PLL_P2_MASK) >> PORT_PLL_P2_SHIFT;
+   clock.m2 |= REG_FIELD_GET(PORT_PLL_M2_FRAC_MASK, 
pll_state->pll2);
+   clock.n = REG_FIELD_GET(PORT_PLL_N_MASK, pll_state->pll1);
+   clock.p1 = REG_FIELD_GET(PORT_PLL_P1_MASK, pll_state->ebb0);
+   clock.p2 = REG_FIELD_GET(PORT_PLL_P2_MASK, pll_state->ebb0);
 
return chv_calc_dpll_params(i915->dpll.ref_clks.nssc, );
 }
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c 
b/drivers/gpu/drm/i915/gvt/handlers.c
index efdd2f3f9d73..0ee3ecc83234 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -576,12 +576,17 @@ static u32 bxt_vgpu_get_dp_bitrate(struct intel_vgpu 
*vgpu, enum port port)
}
 
clock.m1 = 2;
-   clock.m2 = (vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 0)) & 
PORT_PLL_M2_MASK) << 22;
+   clock.m2 = REG_FIELD_GET(PORT_PLL_M2_INT_MASK,
+vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 0))) 
<< 22;
if (vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 3)) & 
PORT_PLL_M2_FRAC_ENABLE)
-   clock.m2 |= vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 2)) & 
PORT_PLL_M2_FRAC_MASK;
-   clock.n = (vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 1)) & 
PORT_PLL_N_MASK) >> PORT_PLL_N_SHIFT;
-   clock.p1 = (vgpu_vreg_t(vgpu, BXT_PORT_PLL_EBB_0(phy, ch)) & 
PORT_PLL_P1_MASK) >> PORT_PLL_P1_SHIFT;
-   clock.p2 = (vgpu_vreg_t(vgpu, BXT_PORT_PLL_EBB_0(phy, ch)) & 
PORT_PLL_P2_MASK) >> PORT_PLL_P2_SHIFT;
+   clock.m2 |= REG_FIELD_GET(PORT_PLL_M2_FRAC_MASK,
+

[Intel-gfx] [PATCH v2 1/8] drm/i915: Store the /5 target clock in struct dpll on vlv/chv

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

Unify vlv/chv with earlier platforms so that the sturct dpll::dot
represents the /5 clock frequency (ie. DP symbol rate or HDMI
TMDS rate) rather than the *5 fast clock (/2 of the bitrate).
Makes life a little less confusing to get the same number back
in .dot which we fed into the DPLL algorithm.

v2: Actually just include the 5x in the final P divider
Do the same change to the hand rolled gvt code

Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/display/intel_dpll.c | 19 ---
 drivers/gpu/drm/i915/gvt/handlers.c   |  4 ++--
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c 
b/drivers/gpu/drm/i915/display/intel_dpll.c
index 0ae37fdbf2a5..b3fd94538c44 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -254,12 +254,12 @@ static const struct intel_limit ilk_limits_dual_lvds_100m 
= {
 
 static const struct intel_limit intel_limits_vlv = {
 /*
- * These are the data rate limits (measured in fast clocks)
+ * These are based on the data rate limits (measured in fast clocks)
  * since those are the strictest limits we have. The fast
  * clock and actual rate limits are more relaxed, so checking
  * them would make no difference.
  */
-   .dot = { .min = 25000 * 5, .max = 27 * 5 },
+   .dot = { .min = 25000, .max = 27 },
.vco = { .min = 400, .max = 600 },
.n = { .min = 1, .max = 7 },
.m1 = { .min = 2, .max = 3 },
@@ -270,12 +270,12 @@ static const struct intel_limit intel_limits_vlv = {
 
 static const struct intel_limit intel_limits_chv = {
/*
-* These are the data rate limits (measured in fast clocks)
+* These are based on the data rate limits (measured in fast clocks)
 * since those are the strictest limits we have.  The fast
 * clock and actual rate limits are more relaxed, so checking
 * them would make no difference.
 */
-   .dot = { .min = 25000 * 5, .max = 54 * 5},
+   .dot = { .min = 25000, .max = 54 },
.vco = { .min = 480, .max = 648 },
.n = { .min = 1, .max = 1 },
.m1 = { .min = 2, .max = 2 },
@@ -337,26 +337,26 @@ int i9xx_calc_dpll_params(int refclk, struct dpll *clock)
 int vlv_calc_dpll_params(int refclk, struct dpll *clock)
 {
clock->m = clock->m1 * clock->m2;
-   clock->p = clock->p1 * clock->p2;
+   clock->p = clock->p1 * clock->p2 * 5;
if (WARN_ON(clock->n == 0 || clock->p == 0))
return 0;
clock->vco = DIV_ROUND_CLOSEST(refclk * clock->m, clock->n);
clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p);
 
-   return clock->dot / 5;
+   return clock->dot;
 }
 
 int chv_calc_dpll_params(int refclk, struct dpll *clock)
 {
clock->m = clock->m1 * clock->m2;
-   clock->p = clock->p1 * clock->p2;
+   clock->p = clock->p1 * clock->p2 * 5;
if (WARN_ON(clock->n == 0 || clock->p == 0))
return 0;
clock->vco = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(refclk, clock->m),
   clock->n << 22);
clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p);
 
-   return clock->dot / 5;
+   return clock->dot;
 }
 
 /*
@@ -659,8 +659,6 @@ vlv_find_best_dpll(const struct intel_limit *limit,
int max_n = min(limit->n.max, refclk / 19200);
bool found = false;
 
-   target *= 5; /* fast clock */
-
memset(best_clock, 0, sizeof(*best_clock));
 
/* based on hardware requirement, prefer smaller n to precision */
@@ -729,7 +727,6 @@ chv_find_best_dpll(const struct intel_limit *limit,
 */
clock.n = 1;
clock.m1 = 2;
-   target *= 5;/* fast clock */
 
for (clock.p1 = limit->p1.max; clock.p1 >= limit->p1.min; clock.p1--) {
for (clock.p2 = limit->p2.p2_fast;
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c 
b/drivers/gpu/drm/i915/gvt/handlers.c
index 520a7e1942f3..efdd2f3f9d73 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -583,7 +583,7 @@ static u32 bxt_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, 
enum port port)
clock.p1 = (vgpu_vreg_t(vgpu, BXT_PORT_PLL_EBB_0(phy, ch)) & 
PORT_PLL_P1_MASK) >> PORT_PLL_P1_SHIFT;
clock.p2 = (vgpu_vreg_t(vgpu, BXT_PORT_PLL_EBB_0(phy, ch)) & 
PORT_PLL_P2_MASK) >> PORT_PLL_P2_SHIFT;
clock.m = clock.m1 * clock.m2;
-   clock.p = clock.p1 * clock.p2;
+   clock.p = clock.p1 * clock.p2 * 5;
 
if (clock.n == 0 || clock.p == 0) {
gvt_dbg_dpy("vgpu-%d PORT_%c PLL has invalid divider\n", 
vgpu->id, port_name(port));
@@ -593,7 +593,7 @@ static u32 bxt_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, 
enum port port)
clock.vco = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(refclk, clock.m), clock.n 
<< 22);
  

[Intel-gfx] [PATCH v2 0/8] drm/i915: Clean up some dpll stuff

2022-03-07 Thread Ville Syrjala
From: Ville Syrjälä 

Clean up a bunch of struct dpll usage, and a few other
random things around the same area.

v2: Clean up the BXT PLL registers and pimp a bunch of comments

Ville Syrjälä (8):
  drm/i915: Store the /5 target clock in struct dpll on vlv/chv
  drm/i915: Remove redundant/wrong comments
  drm/i915: Clean up bxt/glk PLL registers
  drm/i915: Store the m2 divider as a whole in bxt_clk_div
  drm/i915: Replace bxt_clk_div with struct dpll
  drm/i915: Replace hand rolled bxt vco calculation with
chv_calc_dpll_params()
  drm/i915: Populate bxt/glk DPLL clock limits a bit more
  drm/i915: Remove struct dp_link_dpll

 drivers/gpu/drm/i915/display/g4x_dp.c | 55 ---
 drivers/gpu/drm/i915/display/intel_dpll.c | 37 +++-
 drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 94 ---
 drivers/gpu/drm/i915/gvt/handlers.c   | 19 ++--
 drivers/gpu/drm/i915/i915_reg.h   | 61 ++--
 5 files changed, 113 insertions(+), 153 deletions(-)

-- 
2.34.1



Re: [Intel-gfx] [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread David Laight
From: Christoph Hellwig
> Sent: 07 March 2022 15:57
> 
> On Mon, Mar 07, 2022 at 03:29:35PM +0200, Jarkko Sakkinen wrote:
> > So what would you suggest to sort out the issue? I'm happy to go with
> > ioctl if nothing else is acceptable.
> 
> PLenty of drivers treat all mmaps as if MAP_POPULATE was specified,
> typically by using (io_)remap_pfn_range.  If there any reason to only
> optionally have the pre-fault semantics for sgx?  If not this should
> be really simple.  And if we have a real need for it to be optional
> we'll just need to find a sane way to pass that information to ->mmap.

Is there any space in vma->vm_flags ?

That would be better than an extra argument or function.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



[Intel-gfx] [PATCH] Revert "drm/i915/edp: Ignore short pulse when panel powered off"

2022-03-07 Thread José Roberto de Souza
This reverts commit 13ea6db2cf24a797ac8c9922e3079fcb897fd32c.

This patch complete broke eDP short pulse handling as VDD is
only enabled when doing aux transactions or when port is disabled.
Checked on several older kernel versions and that is the behavior
that i915 always had on VDD.

So all legit short pulses done by all the eDP panels are being
ignored and no panel interruption errors are being handled.

Still trying to understand why VDD is not always left enabled but
if it can't, those Sharp panels will need another workaround.

Cc: Anshuman Gupta 
Cc: Jani Nikula 
Cc: Uma Shankar 
Signed-off-by: José Roberto de Souza 
---
 drivers/gpu/drm/i915/display/intel_dp.c  |  7 +++
 drivers/gpu/drm/i915/display/intel_pps.c | 13 -
 drivers/gpu/drm/i915/display/intel_pps.h |  1 -
 3 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
b/drivers/gpu/drm/i915/display/intel_dp.c
index 619546441eae5..8ad5788e5375d 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -4866,13 +4866,12 @@ intel_dp_hpd_pulse(struct intel_digital_port *dig_port, 
bool long_hpd)
struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
struct intel_dp *intel_dp = _port->dp;
 
-   if (dig_port->base.type == INTEL_OUTPUT_EDP &&
-   (long_hpd || !intel_pps_have_power(intel_dp))) {
+   if (long_hpd && dig_port->base.type == INTEL_OUTPUT_EDP) {
/*
-* vdd off can generate a long/short pulse on eDP which
+* vdd off can generate a long pulse on eDP which
 * would require vdd on to handle it, and thus we
 * would end up in an endless cycle of
-* "vdd off -> long/short hpd -> vdd on -> detect -> vdd off -> 
..."
+* "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..."
 */
drm_dbg_kms(>drm,
"ignoring %s hpd on eDP [ENCODER:%d:%s]\n",
diff --git a/drivers/gpu/drm/i915/display/intel_pps.c 
b/drivers/gpu/drm/i915/display/intel_pps.c
index 9c986e8932f87..724947f57664e 100644
--- a/drivers/gpu/drm/i915/display/intel_pps.c
+++ b/drivers/gpu/drm/i915/display/intel_pps.c
@@ -1075,19 +1075,6 @@ static void intel_pps_vdd_sanitize(struct intel_dp 
*intel_dp)
edp_panel_vdd_schedule_off(intel_dp);
 }
 
-bool intel_pps_have_power(struct intel_dp *intel_dp)
-{
-   intel_wakeref_t wakeref;
-   bool have_power = false;
-
-   with_intel_pps_lock(intel_dp, wakeref) {
-   have_power = edp_have_panel_power(intel_dp) &&
- edp_have_panel_vdd(intel_dp);
-   }
-
-   return have_power;
-}
-
 static void pps_init_timestamps(struct intel_dp *intel_dp)
 {
intel_dp->pps.panel_power_off_time = ktime_get_boottime();
diff --git a/drivers/gpu/drm/i915/display/intel_pps.h 
b/drivers/gpu/drm/i915/display/intel_pps.h
index fbb47f6f453e4..799439aba6565 100644
--- a/drivers/gpu/drm/i915/display/intel_pps.h
+++ b/drivers/gpu/drm/i915/display/intel_pps.h
@@ -37,7 +37,6 @@ void intel_pps_vdd_on(struct intel_dp *intel_dp);
 void intel_pps_on(struct intel_dp *intel_dp);
 void intel_pps_off(struct intel_dp *intel_dp);
 void intel_pps_vdd_off_sync(struct intel_dp *intel_dp);
-bool intel_pps_have_power(struct intel_dp *intel_dp);
 void intel_pps_wait_power_cycle(struct intel_dp *intel_dp);
 
 void intel_pps_init(struct intel_dp *intel_dp);
-- 
2.35.1



[Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/mm: Add an iterator to optimally walk over holes suitable for an 
allocation
URL   : https://patchwork.freedesktop.org/series/101123/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/mm: Add an iterator to optimally walk over holes suitable for an 
allocation
URL   : https://patchwork.freedesktop.org/series/101123/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
521ab4ad04ad drm/mm: Add an iterator to optimally walk over holes for an 
allocation (v6)
-:160: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'pos' - possible 
side-effects?
#160: FILE: include/drm/drm_mm.h:430:
+#define drm_mm_for_each_suitable_hole(pos, mm, range_start, range_end, \
+ size, mode) \
+   for (pos = __drm_mm_first_hole(mm, range_start, range_end, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE); \
+pos; \
+pos = (mode) & DRM_MM_INSERT_ONCE ? \
+NULL : __drm_mm_next_hole(mm, pos, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE))

-:160: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'mm' - possible side-effects?
#160: FILE: include/drm/drm_mm.h:430:
+#define drm_mm_for_each_suitable_hole(pos, mm, range_start, range_end, \
+ size, mode) \
+   for (pos = __drm_mm_first_hole(mm, range_start, range_end, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE); \
+pos; \
+pos = (mode) & DRM_MM_INSERT_ONCE ? \
+NULL : __drm_mm_next_hole(mm, pos, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE))

-:160: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'size' - possible 
side-effects?
#160: FILE: include/drm/drm_mm.h:430:
+#define drm_mm_for_each_suitable_hole(pos, mm, range_start, range_end, \
+ size, mode) \
+   for (pos = __drm_mm_first_hole(mm, range_start, range_end, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE); \
+pos; \
+pos = (mode) & DRM_MM_INSERT_ONCE ? \
+NULL : __drm_mm_next_hole(mm, pos, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE))

-:160: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'mode' - possible 
side-effects?
#160: FILE: include/drm/drm_mm.h:430:
+#define drm_mm_for_each_suitable_hole(pos, mm, range_start, range_end, \
+ size, mode) \
+   for (pos = __drm_mm_first_hole(mm, range_start, range_end, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE); \
+pos; \
+pos = (mode) & DRM_MM_INSERT_ONCE ? \
+NULL : __drm_mm_next_hole(mm, pos, size, \
+  (mode) & ~DRM_MM_INSERT_ONCE))

total: 0 errors, 0 warnings, 4 checks, 114 lines checked
eccd97c3fed3 drm/i915/gem: Don't try to map and fence large scanout buffers (v9)




Re: [Intel-gfx] [PATCH] drm/i915/mst: re-enable link training failure fallback for DP MST

2022-03-07 Thread Lyude Paul
On Mon, 2022-03-07 at 22:48 +0200, Ville Syrjälä wrote:
> On Mon, Mar 07, 2022 at 09:36:57PM +0200, Jani Nikula wrote:
> > Commit 80a8cecf62a5 ("drm/i915/dp_mst: Disable link training fallback on
> > MST links") disabled link training failure fallback for DP MST due to
> > the MST manager using the DPCD directly, and generally being ignorant
> > about the possibility of downgrading link parameters. See the commit for
> > further details.
> > 
> > Since then, the max_lane_count and max_link_rate members have been added
> > to struct drm_dp_mst_topology_mgr in commit 98025a62cb00 ("drm/dp_mst:
> > Use Extended Base Receiver Capability DPCD space") and refined in
> > follow-up work.
> > 
> > The members perhaps aren't intended for changing the parameters during
> > the lifetime of the manager, as they're supposed to be passed to
> > drm_dp_mst_topology_mgr_init(). However, the members are only ever used
> > in drm_dp_mst_topology_mgr_set_mst(), and there seems to be nothing to
> > prevent us from adjusting them *before* enabling MST. The wouldn't have
> > an effect if modified while MST is enabled. This is not necessarily
> > pretty, though.
> > 
> > Cc: Nikola Cornij 
> > Cc: Lyude Paul 
> > Cc: Imre Deak 
> > Cc: Ville Syrjälä 
> > Cc: Uma Shankar 
> > Signed-off-by: Jani Nikula 
> > 
> > ---
> > 
> > This is *untested*. I don't see why it wouldn't work, though...
> 
> I don't think we have the required stuff to force a modeset on all
> the streams when the link params change. And the bad link status
> property + uevent stuff is only hooked up to the SST connector
> AFAICS.
> 
> The other major thing missing is a way to reduce the bpp/etc. of
> all the streams to make more room on the link if we have
> insufficient bandwidth. And the more we start to reduce the bw
> the more we're going to hit that and fail the modesets. We already
> ran into regressions due to this when I tried to enable deep color
> for MST.

Yeah, this is why I have been trying to move stuff into the atomic state
because it will make stuff like this a LOT easier. And to be honest, I think
pretty much all of the bandwidth related info in the MST mgr that isn't in
atomic is a hack at this point (I'm definitely not accepting adding any more
props into mgr now). We'll probably also want to consider maybe having a more
complicated link_status API for MST (I was originally going to use the
link_status prop we already have, but I've been realizing that might cause a
lot of problems when initially introducing it since fixing MST link status
errors will likely require disabling all sinks on the link - which userspace
won't understand).

Unfortunately now that I'm back to working on that, I'm stuck on trying to
wrap my head around adjusting amdgpu for these changes <<. I have a WIP branch
with other drivers adjusted if anyone is interested in looking:

https://gitlab.freedesktop.org/lyudess/linux/-/commits/wip/mst-atomic-only-v1

Haven't actually tried it yet on any hardware though

> 
> > this
> > should allow us to downgrade the link to from 128b/132b to 8b/10b if the
> > former fails.
> > 
> > Thoughts? In particular, any objections for messing with the topology
> > manager members directly? Any chance it'll make refactoring the MST code
> > more difficult?
> > ---
> >  drivers/gpu/drm/i915/display/intel_dp.c | 42 ++---
> >  drivers/gpu/drm/i915/display/intel_dp_mst.c |  5 ++-
> >  2 files changed, 23 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > b/drivers/gpu/drm/i915/display/intel_dp.c
> > index 619546441eae..2fad3104b40e 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > @@ -600,15 +600,6 @@ int intel_dp_get_link_train_fallback_values(struct
> > intel_dp *intel_dp,
> > struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > int index;
> >  
> > -   /*
> > -    * TODO: Enable fallback on MST links once MST link compute can
> > handle
> > -    * the fallback params.
> > -    */
> > -   if (intel_dp->is_mst) {
> > -   drm_err(>drm, "Link Training Unsuccessful\n");
> > -   return -1;
> > -   }
> > -
> > if (intel_dp_is_edp(intel_dp) && !intel_dp->use_max_params) {
> > drm_dbg_kms(>drm,
> >     "Retrying Link training for eDP with max
> > parameters\n");
> > @@ -2785,6 +2776,8 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
> > struct drm_i915_private *i915 = dp_to_i915(intel_dp);
> > struct intel_encoder *encoder =
> > _to_dig_port(intel_dp)->base;
> > +   struct drm_dp_mst_topology_mgr *mgr = _dp->mst_mgr;
> > +
> > bool sink_can_mst = drm_dp_read_mst_cap(_dp->aux, intel_dp-
> > >dpcd);
> >  
> > drm_dbg_kms(>drm,
> > @@ -2800,8 +2793,17 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
> > intel_dp->is_mst = sink_can_mst &&
> > 

Re: [Intel-gfx] [PATCH] drm/i915/mst: re-enable link training failure fallback for DP MST

2022-03-07 Thread Ville Syrjälä
On Mon, Mar 07, 2022 at 09:36:57PM +0200, Jani Nikula wrote:
> Commit 80a8cecf62a5 ("drm/i915/dp_mst: Disable link training fallback on
> MST links") disabled link training failure fallback for DP MST due to
> the MST manager using the DPCD directly, and generally being ignorant
> about the possibility of downgrading link parameters. See the commit for
> further details.
> 
> Since then, the max_lane_count and max_link_rate members have been added
> to struct drm_dp_mst_topology_mgr in commit 98025a62cb00 ("drm/dp_mst:
> Use Extended Base Receiver Capability DPCD space") and refined in
> follow-up work.
> 
> The members perhaps aren't intended for changing the parameters during
> the lifetime of the manager, as they're supposed to be passed to
> drm_dp_mst_topology_mgr_init(). However, the members are only ever used
> in drm_dp_mst_topology_mgr_set_mst(), and there seems to be nothing to
> prevent us from adjusting them *before* enabling MST. The wouldn't have
> an effect if modified while MST is enabled. This is not necessarily
> pretty, though.
> 
> Cc: Nikola Cornij 
> Cc: Lyude Paul 
> Cc: Imre Deak 
> Cc: Ville Syrjälä 
> Cc: Uma Shankar 
> Signed-off-by: Jani Nikula 
> 
> ---
> 
> This is *untested*. I don't see why it wouldn't work, though...

I don't think we have the required stuff to force a modeset on all
the streams when the link params change. And the bad link status
property + uevent stuff is only hooked up to the SST connector
AFAICS.

The other major thing missing is a way to reduce the bpp/etc. of
all the streams to make more room on the link if we have
insufficient bandwidth. And the more we start to reduce the bw
the more we're going to hit that and fail the modesets. We already
ran into regressions due to this when I tried to enable deep color
for MST.

> this
> should allow us to downgrade the link to from 128b/132b to 8b/10b if the
> former fails.
> 
> Thoughts? In particular, any objections for messing with the topology
> manager members directly? Any chance it'll make refactoring the MST code
> more difficult?
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c | 42 ++---
>  drivers/gpu/drm/i915/display/intel_dp_mst.c |  5 ++-
>  2 files changed, 23 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
> b/drivers/gpu/drm/i915/display/intel_dp.c
> index 619546441eae..2fad3104b40e 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -600,15 +600,6 @@ int intel_dp_get_link_train_fallback_values(struct 
> intel_dp *intel_dp,
>   struct drm_i915_private *i915 = dp_to_i915(intel_dp);
>   int index;
>  
> - /*
> -  * TODO: Enable fallback on MST links once MST link compute can handle
> -  * the fallback params.
> -  */
> - if (intel_dp->is_mst) {
> - drm_err(>drm, "Link Training Unsuccessful\n");
> - return -1;
> - }
> -
>   if (intel_dp_is_edp(intel_dp) && !intel_dp->use_max_params) {
>   drm_dbg_kms(>drm,
>   "Retrying Link training for eDP with max 
> parameters\n");
> @@ -2785,6 +2776,8 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
>   struct drm_i915_private *i915 = dp_to_i915(intel_dp);
>   struct intel_encoder *encoder =
>   _to_dig_port(intel_dp)->base;
> + struct drm_dp_mst_topology_mgr *mgr = _dp->mst_mgr;
> +
>   bool sink_can_mst = drm_dp_read_mst_cap(_dp->aux, intel_dp->dpcd);
>  
>   drm_dbg_kms(>drm,
> @@ -2800,8 +2793,17 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
>   intel_dp->is_mst = sink_can_mst &&
>   i915->params.enable_dp_mst;
>  
> - drm_dp_mst_topology_mgr_set_mst(_dp->mst_mgr,
> - intel_dp->is_mst);
> + /*
> +  * Set the source max lane count and link rate using the possibly
> +  * limited values due to failed link training.
> +  *
> +  * This is a bit hackish, as the values are supposed to be passed to
> +  * drm_dp_mst_topology_mgr_init().
> +  */
> + mgr->max_lane_count = intel_dp->max_link_lane_count;
> + mgr->max_link_rate = intel_dp->max_link_rate;
> +
> + drm_dp_mst_topology_mgr_set_mst(mgr, intel_dp->is_mst);
>  }
>  
>  static bool
> @@ -4472,23 +4474,19 @@ intel_dp_detect(struct drm_connector *connector,
>   goto out;
>   }
>  
> - /* Read DP Sink DSC Cap DPCD regs for DP v1.4 */
> - if (DISPLAY_VER(dev_priv) >= 11)
> - intel_dp_get_dsc_sink_cap(intel_dp);
> -
> - intel_dp_configure_mst(intel_dp);
> -
> - /*
> -  * TODO: Reset link params when switching to MST mode, until MST
> -  * supports link training fallback params.
> -  */
> - if (intel_dp->reset_link_params || intel_dp->is_mst) {
> + if (intel_dp->reset_link_params) {
>   intel_dp_reset_max_link_params(intel_dp);
>   intel_dp->reset_link_params = false;
>   }
>  

[Intel-gfx] ✗ Fi.CI.DOCS: warning for drm/doc/rfc: i915 VM_BIND feature design + uapi (rev2)

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/doc/rfc: i915 VM_BIND feature design + uapi (rev2)
URL   : https://patchwork.freedesktop.org/series/93447/
State : warning

== Summary ==

$ make htmldocs 2>&1 > /dev/null | grep i915
/home/cidrm/kernel/Documentation/gpu/rfc/i915_vm_bind.rst:31: WARNING: 
Unexpected indentation.
/home/cidrm/kernel/Documentation/gpu/rfc/i915_vm_bind.rst:32: WARNING: Block 
quote ends without a blank line; unexpected unindent.




[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/doc/rfc: i915 VM_BIND feature design + uapi (rev2)

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/doc/rfc: i915 VM_BIND feature design + uapi (rev2)
URL   : https://patchwork.freedesktop.org/series/93447/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
a40e87e2a2f3 drm/doc/rfc: VM_BIND feature design document
-:11: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does 
MAINTAINERS need updating?
#11: 
new file mode 100644

-:16: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier 
tag in line 1
#16: FILE: Documentation/gpu/rfc/i915_vm_bind.rst:1:
+==

-:112: WARNING:TYPO_SPELLING: 'an user' may be misspelled - perhaps 'a user'?
#112: FILE: Documentation/gpu/rfc/i915_vm_bind.rst:97:
+wakeup the waiting process. User can wait on an user fence with the
  ^^^

-:117: WARNING:TYPO_SPELLING: 'an user' may be misspelled - perhaps 'a user'?
#117: FILE: Documentation/gpu/rfc/i915_vm_bind.rst:102:
+precision on the wakeup. Each batch can signal an user fence to indicate
^^^

total: 0 errors, 4 warnings, 0 checks, 217 lines checked
95969abb7e7c drm/doc/rfc: VM_BIND uapi definition
-:11: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does 
MAINTAINERS need updating?
#11: 
new file mode 100644

-:29: WARNING:LONG_LINE: line length of 126 exceeds 100 columns
#29: FILE: Documentation/gpu/rfc/i915_vm_bind.h:14:
+#define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind)

-:30: WARNING:LONG_LINE: line length of 128 exceeds 100 columns
#30: FILE: Documentation/gpu/rfc/i915_vm_bind.h:15:
+#define DRM_IOCTL_I915_GEM_VM_UNBIND   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_VM_UNBIND, struct drm_i915_gem_vm_bind)

-:31: WARNING:LONG_LINE: line length of 142 exceeds 100 columns
#31: FILE: Documentation/gpu/rfc/i915_vm_bind.h:16:
+#define DRM_IOCTL_I915_GEM_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_WAIT_USER_FENCE, struct drm_i915_gem_wait_user_fence)

-:129: CHECK:LINE_SPACING: Please don't use multiple blank lines
#129: FILE: Documentation/gpu/rfc/i915_vm_bind.h:114:
+
+

-:135: CHECK:LINE_SPACING: Please don't use multiple blank lines
#135: FILE: Documentation/gpu/rfc/i915_vm_bind.h:120:
+
+

total: 0 errors, 4 warnings, 2 checks, 176 lines checked




[Intel-gfx] [PATCH v6 2/2] drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

2022-03-07 Thread Vivek Kasireddy
On platforms capable of allowing 8K (7680 x 4320) modes, pinning 2 or
more framebuffers/scanout buffers results in only one that is mappable/
fenceable. Therefore, pageflipping between these 2 FBs where only one
is mappable/fenceable creates latencies large enough to miss alternate
vblanks thereby producing less optimal framerate.

This mainly happens because when i915_gem_object_pin_to_display_plane()
is called to pin one of the FB objs, the associated vma is identified
as misplaced and therefore i915_vma_unbind() is called which unbinds and
evicts it. This misplaced vma gets subseqently pinned only when
i915_gem_object_ggtt_pin_ww() is called without PIN_MAPPABLE. This
results in a latency of ~10ms and happens every other vblank/repaint cycle.
Therefore, to fix this issue, we try to see if there is space to map
at-least two objects of a given size and return early if there isn't. This
would ensure that we do not try with PIN_MAPPABLE for any objects that
are too big to map thereby preventing unncessary unbind.

Testcase:
Running Weston and weston-simple-egl on an Alderlake_S (ADLS) platform
with a 8K@60 mode results in only ~40 FPS. Since upstream Weston submits
a frame ~7ms before the next vblank, the latencies seen between atomic
commit and flip event are 7, 24 (7 + 16.66), 7, 24. suggesting that
it misses the vblank every other frame.

Here is the ftrace snippet that shows the source of the ~10ms latency:
  i915_gem_object_pin_to_display_plane() {
0.102 us   |i915_gem_object_set_cache_level();
i915_gem_object_ggtt_pin_ww() {
0.390 us   |  i915_vma_instance();
0.178 us   |  i915_vma_misplaced();
  i915_vma_unbind() {
  __i915_active_wait() {
0.082 us   |i915_active_acquire_if_busy();
0.475 us   |  }
  intel_runtime_pm_get() {
0.087 us   |intel_runtime_pm_acquire();
0.259 us   |  }
  __i915_active_wait() {
0.085 us   |i915_active_acquire_if_busy();
0.240 us   |  }
  __i915_vma_evict() {
ggtt_unbind_vma() {
  gen8_ggtt_clear_range() {
10507.255 us |}
10507.689 us |  }
10508.516 us |   }

v2: Instead of using bigjoiner checks, determine whether a scanout
buffer is too big by checking to see if it is possible to map
two of them into the ggtt.

v3 (Ville):
- Count how many fb objects can be fit into the available holes
  instead of checking for a hole twice the object size.
- Take alignment constraints into account.
- Limit this large scanout buffer check to >= Gen 11 platforms.

v4:
- Remove existing heuristic that checks just for size. (Ville)
- Return early if we find space to map at-least two objects. (Tvrtko)
- Slightly update the commit message.

v5: (Tvrtko)
- Rename the function to indicate that the object may be too big to
  map into the aperture.
- Account for guard pages while calculating the total size required
  for the object.
- Do not subject all objects to the heuristic check and instead
  consider objects only of a certain size.
- Do the hole walk using the rbtree.
- Preserve the existing PIN_NONBLOCK logic.
- Drop the PIN_MAPPABLE check while pinning the VMA.

v6: (Tvrtko)
- Return 0 on success and the specific error code on failure to
  preserve the existing behavior.

v7: (Ville)
- Drop the HAS_GMCH(i915), DISPLAY_VER(i915) < 11 and
  size < ggtt->mappable_end / 4 checks.
- Drop the redundant check that is based on previous heuristic.

v8:
- Make sure that we are holding the mutex associated with ggtt vm
  as we traverse the hole nodes.

v9: (Tvrtko)
- Use mutex_lock_interruptible_nested() instead of mutex_lock().

Cc: Ville Syrjälä 
Cc: Maarten Lankhorst 
Cc: Tvrtko Ursulin 
Cc: Manasi Navare 
Reviewed-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 1 file changed, 94 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9747924cc57b..e0d731b3f215 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,7 @@
 #include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
+#include "gem/i915_gem_tiling.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
@@ -882,6 +883,96 @@ static void discard_ggtt_vma(struct i915_vma *vma)
spin_unlock(>vma.lock);
 }
 
+static int
+i915_gem_object_fits_in_aperture(struct drm_i915_gem_object *obj,
+u64 alignment, u64 flags)
+{
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+   struct drm_mm_node *hole;
+   u64 hole_start, hole_end, start, end;
+   u64 fence_size, fence_alignment;
+   unsigned int count = 0;
+   int err = 0;
+
+   /*
+* If the 

[Intel-gfx] [PATCH v6 1/2] drm/mm: Add an iterator to optimally walk over holes for an allocation (v6)

2022-03-07 Thread Vivek Kasireddy
This iterator relies on drm_mm_first_hole() and drm_mm_next_hole()
functions to identify suitable holes for an allocation of a given
size by efficiently traversing the rbtree associated with the given
allocator.

It replaces the for loop in drm_mm_insert_node_in_range() and can
also be used by drm drivers to quickly identify holes of a certain
size within a given range.

v2: (Tvrtko)
- Prepend a double underscore for the newly exported first/next_hole
- s/each_best_hole/each_suitable_hole/g
- Mask out DRM_MM_INSERT_ONCE from the mode before calling
  first/next_hole and elsewhere.

v3: (Tvrtko)
- Reduce the number of hunks by retaining the "mode" variable name

v4:
- Typo: s/__drm_mm_next_hole(.., hole/__drm_mm_next_hole(.., pos

v5: (Tvrtko)
- Fixed another typo: should pass caller_mode instead of mode to
  the iterator in drm_mm_insert_node_in_range().

v6: (Tvrtko)
- Fix the checkpatch warning that warns about precedence issues.

Reviewed-by: Tvrtko Ursulin 
Acked-by: Christian König 
Suggested-by: Tvrtko Ursulin 
Signed-off-by: Vivek Kasireddy 
---
 drivers/gpu/drm/drm_mm.c | 32 +++-
 include/drm/drm_mm.h | 36 
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8257f9d4f619..6ff98a0e4df3 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -352,10 +352,10 @@ static struct drm_mm_node *find_hole_addr(struct drm_mm 
*mm, u64 addr, u64 size)
return node;
 }
 
-static struct drm_mm_node *
-first_hole(struct drm_mm *mm,
-  u64 start, u64 end, u64 size,
-  enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -374,6 +374,7 @@ first_hole(struct drm_mm *mm,
hole_stack);
}
 }
+EXPORT_SYMBOL(__drm_mm_first_hole);
 
 /**
  * DECLARE_NEXT_HOLE_ADDR - macro to declare next hole functions
@@ -410,11 +411,11 @@ static struct drm_mm_node *name(struct drm_mm_node 
*entry, u64 size)  \
 DECLARE_NEXT_HOLE_ADDR(next_hole_high_addr, rb_left, rb_right)
 DECLARE_NEXT_HOLE_ADDR(next_hole_low_addr, rb_right, rb_left)
 
-static struct drm_mm_node *
-next_hole(struct drm_mm *mm,
- struct drm_mm_node *node,
- u64 size,
- enum drm_mm_insert_mode mode)
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode)
 {
switch (mode) {
default:
@@ -432,6 +433,7 @@ next_hole(struct drm_mm *mm,
return >hole_stack == >hole_stack ? NULL : node;
}
 }
+EXPORT_SYMBOL(__drm_mm_next_hole);
 
 /**
  * drm_mm_reserve_node - insert an pre-initialized node
@@ -516,11 +518,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
u64 size, u64 alignment,
unsigned long color,
u64 range_start, u64 range_end,
-   enum drm_mm_insert_mode mode)
+   enum drm_mm_insert_mode caller_mode)
 {
struct drm_mm_node *hole;
u64 remainder_mask;
-   bool once;
+   enum drm_mm_insert_mode mode = caller_mode & ~DRM_MM_INSERT_ONCE;
 
DRM_MM_BUG_ON(range_start > range_end);
 
@@ -533,13 +535,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
if (alignment <= 1)
alignment = 0;
 
-   once = mode & DRM_MM_INSERT_ONCE;
-   mode &= ~DRM_MM_INSERT_ONCE;
-
remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
-   for (hole = first_hole(mm, range_start, range_end, size, mode);
-hole;
-hole = once ? NULL : next_hole(mm, hole, size, mode)) {
+   drm_mm_for_each_suitable_hole(hole, mm, range_start, range_end,
+ size, caller_mode) {
u64 hole_start = __drm_mm_hole_node_start(hole);
u64 hole_end = hole_start + hole->hole_size;
u64 adj_start, adj_end;
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..896754fa6d69 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -400,6 +400,42 @@ static inline u64 drm_mm_hole_node_end(const struct 
drm_mm_node *hole_node)
 1 : 0; \
 pos = list_next_entry(pos, hole_stack))
 
+struct drm_mm_node *
+__drm_mm_first_hole(struct drm_mm *mm,
+   u64 start, u64 end, u64 size,
+   enum drm_mm_insert_mode mode);
+
+struct drm_mm_node *
+__drm_mm_next_hole(struct drm_mm *mm,
+  struct drm_mm_node *node,
+  u64 size,
+  enum drm_mm_insert_mode mode);
+
+/**

[Intel-gfx] [PATCH v6 0/2] drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-03-07 Thread Vivek Kasireddy
The first patch is a drm core patch that replaces the for loop in
drm_mm_insert_node_in_range() with the iterator and would not
cause any functional changes. The second patch is a i915 driver
specific patch that also uses the iterator but solves a different
problem.

v2:
- Added a new patch to this series to fix a potential NULL
  dereference.
- Fixed a typo associated with the iterator introduced in the
  drm core patch.
- Added locking around the snippet in the i915 patch that
  traverses the GGTT hole nodes.

v3: (Tvrtko)
- Replaced mutex_lock with mutex_lock_interruptible_nested() in
  the i915 patch.

v4: (Tvrtko)
- Dropped the patch added in v2 as it was deemed unnecessary.

v5: (Tvrtko)
- Fixed yet another typo in the drm core patch: should have
  passed caller_mode instead of mode to the iterator.

v6: (Tvrtko)
- Fixed the checkpatch warning that warns about precedence issues.

Cc: Tvrtko Ursulin 
Cc: Nirmoy Das 
Cc: Christian König 

Vivek Kasireddy (2):
  drm/mm: Add an iterator to optimally walk over holes for an allocation
(v6)
  drm/i915/gem: Don't try to map and fence large scanout buffers (v9)

 drivers/gpu/drm/drm_mm.c|  32 
 drivers/gpu/drm/i915/i915_gem.c | 128 +++-
 include/drm/drm_mm.h|  36 +
 3 files changed, 145 insertions(+), 51 deletions(-)

-- 
2.35.1



[Intel-gfx] [RFC v2 2/2] drm/doc/rfc: VM_BIND uapi definition

2022-03-07 Thread Niranjana Vishwanathapura
VM_BIND und related uapi definitions

Signed-off-by: Niranjana Vishwanathapura 
---
 Documentation/gpu/rfc/i915_vm_bind.h | 176 +++
 1 file changed, 176 insertions(+)
 create mode 100644 Documentation/gpu/rfc/i915_vm_bind.h

diff --git a/Documentation/gpu/rfc/i915_vm_bind.h 
b/Documentation/gpu/rfc/i915_vm_bind.h
new file mode 100644
index ..80f00ee6c8a1
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_vm_bind.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+/* VM_BIND feature availability through drm_i915_getparam */
+#define I915_PARAM_HAS_VM_BIND 57
+
+/* VM_BIND related ioctls */
+#define DRM_I915_GEM_VM_BIND   0x3d
+#define DRM_I915_GEM_VM_UNBIND 0x3e
+#define DRM_I915_GEM_WAIT_USER_FENCE   0x3f
+
+#define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind)
+#define DRM_IOCTL_I915_GEM_VM_UNBIND   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_VM_UNBIND, struct drm_i915_gem_vm_bind)
+#define DRM_IOCTL_I915_GEM_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_WAIT_USER_FENCE, struct drm_i915_gem_wait_user_fence)
+
+/**
+ * struct drm_i915_gem_vm_bind - VA to object/buffer mapping to [un]bind.
+ */
+struct drm_i915_gem_vm_bind {
+   /** vm to [un]bind */
+   __u32 vm_id;
+
+   /**
+* BO handle or file descriptor.
+* 'fd' value of -1 is reserved for system pages (SVM)
+*/
+   union {
+   __u32 handle; /* For unbind, it is reserved and must be 0 */
+   __s32 fd;
+   }
+
+   /** VA start to [un]bind */
+   __u64 start;
+
+   /** Offset in object to [un]bind */
+   __u64 offset;
+
+   /** VA length to [un]bind */
+   __u64 length;
+
+   /** Flags */
+   __u64 flags;
+   /** Bind the mapping immediately instead of during next submission */
+#define I915_GEM_VM_BIND_IMMEDIATE   (1 << 0)
+   /** Read-only mapping */
+#define I915_GEM_VM_BIND_READONLY(1 << 1)
+   /** Capture this mapping in the dump upon GPU error */
+#define I915_GEM_VM_BIND_CAPTURE (1 << 2)
+
+   /** Zero-terminated chain of extensions */
+   __u64 extensions;
+};
+
+/**
+ * struct drm_i915_vm_bind_ext_user_fence - Bind completion signaling 
extension.
+ */
+struct drm_i915_vm_bind_ext_user_fence {
+#define I915_VM_BIND_EXT_USER_FENCE0
+   /** @base: Extension link. See struct i915_user_extension. */
+   struct i915_user_extension base;
+
+   /** User/Memory fence qword alinged process virtual address */
+   __u64 addr;
+
+   /** User/Memory fence value to be written after bind completion */
+   __u64 val;
+
+   /** Reserved for future extensions */
+   __u64 rsvd;
+};
+
+/**
+ * struct drm_i915_gem_execbuffer_ext_user_fence - First level batch completion
+ * signaling extension.
+ *
+ * This extension allows user to attach a user fence ( pair) to an
+ * execbuf to be signaled by the command streamer after the completion of 1st
+ * level batch, by writing the  at specified  and triggering an
+ * interrupt.
+ * User can either poll for this user fence to signal or can also wait on it
+ * with i915_gem_wait_user_fence ioctl.
+ * This is very much usefaul for long running contexts where waiting on 
dma-fence
+ * by user (like i915_gem_wait ioctl) is not supported.
+ */
+struct drm_i915_gem_execbuffer_ext_user_fence {
+#define DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE 0
+   /** @base: Extension link. See struct i915_user_extension. */
+   struct i915_user_extension base;
+
+   /**
+* User/Memory fence qword aligned GPU virtual address.
+* Address has to be a valid GPU virtual address at the time of
+* 1st level batch completion.
+*/
+   __u64 addr;
+
+   /**
+* User/Memory fence Value to be written to above address
+* after 1st level batch completes.
+*/
+   __u64 value;
+
+   /** Reserved for future extensions */
+   __u64 rsvd;
+};
+
+struct drm_i915_gem_vm_control {
+/** Flag to opt-in for VM_BIND mode of binding during VM creation */
+#define I915_VM_CREATE_FLAGS_USE_VM_BIND   (1 << 0)
+};
+
+
+struct drm_i915_gem_create_ext {
+/** Extension to make the object private to a specified VM */
+#define I915_GEM_CREATE_EXT_VM_PRIVATE 2
+};
+
+
+struct prelim_drm_i915_gem_context_create_ext {
+/** Flag to declare context as long running */
+#define I915_CONTEXT_CREATE_FLAGS_LONG_RUNNING   (1u << 2)
+};
+
+/**
+ * struct drm_i915_gem_wait_user_fence
+ *
+ * Wait on user/memory fence. User/Memory fence can be woken up either by,
+ *1. GPU context indicated by 'ctx_id', or,
+ *2. Kerrnel driver async worker upon I915_UFENCE_WAIT_SOFT.
+ *   'ctx_id' is ignored when this flag is set.
+ *
+ * Wakeup when below condition is true.
+ * (*addr & MASK) OP (VALUE & MASK)
+ *
+ */
+~struct 

[Intel-gfx] [RFC v2 0/2] drm/doc/rfc: i915 VM_BIND feature design + uapi

2022-03-07 Thread Niranjana Vishwanathapura
This is the i915 driver VM_BIND feature design RFC patch series along
with the required uapi definition and description of intended use cases.

v2: Updated design and uapi, more documentation.

Signed-off-by: Niranjana Vishwanathapura 

Niranjana Vishwanathapura (2):
  drm/doc/rfc: VM_BIND feature design document
  drm/doc/rfc: VM_BIND uapi definition

 Documentation/gpu/rfc/i915_vm_bind.h   | 176 +
 Documentation/gpu/rfc/i915_vm_bind.rst | 210 +
 Documentation/gpu/rfc/index.rst|   4 +
 3 files changed, 390 insertions(+)
 create mode 100644 Documentation/gpu/rfc/i915_vm_bind.h
 create mode 100644 Documentation/gpu/rfc/i915_vm_bind.rst

-- 
2.21.0.rc0.32.g243a4c7e27



[Intel-gfx] [RFC v2 1/2] drm/doc/rfc: VM_BIND feature design document

2022-03-07 Thread Niranjana Vishwanathapura
VM_BIND design document with description of intended use cases.

Signed-off-by: Niranjana Vishwanathapura 
---
 Documentation/gpu/rfc/i915_vm_bind.rst | 210 +
 Documentation/gpu/rfc/index.rst|   4 +
 2 files changed, 214 insertions(+)
 create mode 100644 Documentation/gpu/rfc/i915_vm_bind.rst

diff --git a/Documentation/gpu/rfc/i915_vm_bind.rst 
b/Documentation/gpu/rfc/i915_vm_bind.rst
new file mode 100644
index ..cdc6bb25b942
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_vm_bind.rst
@@ -0,0 +1,210 @@
+==
+I915 VM_BIND feature design and use cases
+==
+
+VM_BIND feature
+
+DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM buffer
+objects (BOs) or sections of a BOs at specified GPU virtual addresses on
+a specified address space (VM).
+
+These mappings (also referred to as persistent mappings) will be persistent
+across multiple GPU submissions (execbuff) issued by the UMD, without user
+having to provide a list of all required mappings during each submission
+(as required by older execbuff mode).
+
+VM_BIND ioctl deferes binding the mappings until next execbuff submission
+where it will be required, or immediately if I915_GEM_VM_BIND_IMMEDIATE
+flag is set (useful if mapping is required for an active context).
+
+VM_BIND feature is advertised to user via I915_PARAM_HAS_VM_BIND.
+User has to opt-in for VM_BIND mode of binding for an address space (VM)
+during VM creation time via I915_VM_CREATE_FLAGS_USE_VM_BIND extension.
+A VM in VM_BIND mode will not support older execbuff mode of binding.
+
+UMDs can still send BOs of these persistent mappings in execlist of execbuff
+for specifying BO dependencies (implicit fencing) and to use BO as a batch,
+but those BOs should be mapped ahead via vm_bind ioctl.
+
+VM_BIND features include,
+- Multiple Virtual Address (VA) mappings can map to the same physical pages
+  of an object (aliasing).
+- VA mapping can map to a partial section of the BO (partial binding).
+- Support capture of persistent mappings in the dump upon GPU error.
+- TLB is flushed upon unbind completion. Batching of TLB flushes in some
+  usecases will be helpful.
+- Asynchronous vm_bind and vm_unbind support.
+- VM_BIND uses user/memory fence mechanism for signaling bind completion
+  and for signaling batch completion in long running contexts (explained
+  below).
+
+VM_PRIVATE objects
+--
+By default, BOs can be mapped on multiple VMs and can also be dma-buf
+exported. Hence these BOs are referred to as Shared BOs.
+During each execbuff submission, the request fence must be added to the
+dma-resv fence list of all shared BOs mapped on the VM.
+
+VM_BIND feature introduces an optimization where user can create BO which
+is private to a specified VM via I915_GEM_CREATE_EXT_VM_PRIVATE flag during
+BO creation. Unlike Shared BOs, these VM private BOs can only be mapped on
+the VM they are private to and can't be dma-buf exported.
+All private BOs of a VM share the dma-resv object. Hence during each execbuff
+submission, they need only one dma-resv fence list updated. Thus the fast
+path (where required mappings are already bound) submission latency is O(1)
+w.r.t the number of VM private BOs.
+
+VM_BIND locking hirarchy
+-
+VM_BIND locking order is as below.
+
+1) A vm_bind mutex will protect vm_bind lists. This lock is taken in vm_bind/
+   vm_unbind ioctl calls, in the execbuff path and while releasing the mapping.
+
+   In future, when GPU page faults are supported, we can potentially use a
+   rwsem instead, so that multiple pagefault handlers can take the read side
+   lock to lookup the mapping and hence can run in parallel.
+
+2) The BO's dma-resv lock will protect i915_vma state and needs to be held
+   while binding a vma and while updating dma-resv fence list of a BO.
+   The private BOs of a VM will all share a dma-resv object.
+
+   This lock is held in vm_bind call for immediate binding, during vm_unbind
+   call for unbinding and during execbuff path for binding the mapping and
+   updating the dma-resv fence list of the BO.
+
+3) Spinlock/s to protect some of the VM's lists.
+
+We will also need support for bluk LRU movement of persistent mapping to
+avoid additional latencies in execbuff path.
+
+GPU page faults
+
+Both older execbuff mode and the newer VM_BIND mode of binding will require
+using dma-fence to ensure residency.
+In future when GPU page faults are supported, no dma-fence usage is required
+as residency is purely managed by installing and removing/invalidating ptes.
+
+
+User/Memory Fence
+==
+The idea is to take a user specified virtual address and install an interrupt
+handler to wake up the current task when the memory location passes the user
+supplied filter.
+
+User/Memory fence is a  pair. To signal the user fence,
+specified value will 

Re: [Intel-gfx] [PATCH v5 4/7] drm/i915/gt: create per-tile sysfs interface

2022-03-07 Thread Andrzej Hajda




On 07.03.2022 00:04, Andi Shyti wrote:

Hi Andrzej,

[...]


+bool is_object_gt(struct kobject *kobj)
+{
+   return !strncmp(kobj->name, "gt", 2);
+}

It looks quite fragile, at the moment I do not have better idea:) maybe
after reviewing the rest of the patches.

yeah... it's not pretty, I agree, but I couldn't come up with a
better way of doing it.


+static struct intel_gt *kobj_to_gt(struct kobject *kobj)
+{
+   return container_of(kobj, struct kobj_gt, base)->gt;
+}
+
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+   const char *name)
+{
+   struct kobject *kobj = >kobj;
+
+   /*
+* We are interested at knowing from where the interface
+* has been called, whether it's called from gt/ or from
+* the parent directory.
+* From the interface position it depends also the value of
+* the private data.
+* If the interface is called from gt/ then private data is
+* of the "struct intel_gt *" type, otherwise it's * a
+* "struct drm_i915_private *" type.
+*/
+   if (!is_object_gt(kobj)) {
+   struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
+
+   pr_devel_ratelimited(DEPRECATED
+   "%s (pid %d) is accessing deprecated %s "
+   "sysfs control, please use gt/gt/%s instead\n",
+   current->comm, task_pid_nr(current), name, name);
+   return to_gt(i915);
+   }
+
+   return kobj_to_gt(kobj);

It took some time for me to understand what is going on here.
We have dev argument which sometimes can point to "struct device", sometimes
to "struct kobj_gt", but it's type suggests differently, quite ugly.
I wonder if wouldn't be better to use __ATTR instead of DEVICE_ATTR* as in
case of intel_engines_add_sysfs. This way abstractions would look better,
hopefully.

How would it help?

The difference is that I'm adding twice different interfaces with
the same name and different location (i.e. different object). The
legacy intrefaces inherit the object from drm and I'm preserving
that reference.

While the new objects would derive from the previous and they are
pretty much like intel_engines_add_sysfs().


I was not clear on the issue. Here in case of 'id' attribute it is 
defined as device_attribute, but in kobj_type.sysfs_ops you assign 
formally incompatible _sysfs_ops.
kobj_sysfs_ops expects kobj_attribute! Fortunately kobj_attribute is 
'binary compatible' with device_attribute and kobj is at beginning of 
struct device as well, so it does not blow up, but I wouldn't say it is 
clean solution :)
If you look at intel_engines_add_sysfs you can see that all attributes 
are defined as kobj_attribute.


Regards
Andrzej



[...]


+struct kobject *
+intel_gt_create_kobj(struct intel_gt *gt, struct kobject *dir, const char 
*name)
+{
+   struct kobj_gt *kg;
+
+   kg = kzalloc(sizeof(*kg), GFP_KERNEL);
+   if (!kg)
+   return NULL;
+
+   kobject_init(>base, _gt_type);
+   kg->gt = gt;
+
+   /* xfer ownership to sysfs tree */
+   if (kobject_add(>base, dir, "%s", name)) {
+   kobject_put(>base);
+   return NULL;
+   }
+
+   return >base; /* borrowed ref */
+}
+
+void intel_gt_sysfs_register(struct intel_gt *gt)
+{
+   struct kobject *dir;
+   char name[80];
+
+   snprintf(name, sizeof(name), "gt%d", gt->info.id);
+
+   dir = intel_gt_create_kobj(gt, gt->i915->sysfs_gt, name);
+   if (!dir) {
+   drm_warn(>i915->drm,
+"failed to initialize %s sysfs root\n", name);
+   return;
+   }
+}

Squashing intel_gt_create_kobj into intel_gt_sysfs_register would simplify
code and allows drop snprintf to local array.

right!


+static struct kobject *i915_setup_gt_sysfs(struct kobject *parent)
+{
+   return kobject_create_and_add("gt", parent);
+}
+
   void i915_setup_sysfs(struct drm_i915_private *dev_priv)
   {
struct device *kdev = dev_priv->drm.primary->kdev;
@@ -538,6 +543,11 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
if (ret)
drm_err(_priv->drm, "RPS sysfs setup failed\n");
+   dev_priv->sysfs_gt = i915_setup_gt_sysfs(>kobj);

Why not directly kobject_create_and_add("gt", parent) ? up to you.

of course!

[...]

Thanks a lot for the review,
Andi




Re: [Intel-gfx] [PATCH v7] drm/i915/display/vrr: Reset VRR capable property on a long hpd

2022-03-07 Thread Navare, Manasi
Hi Ville,

Here the VRR set/reset moved to set/unset edid like you suggested.
Anything else needed here?

Manasi

On Thu, Mar 03, 2022 at 03:32:22PM -0800, Manasi Navare wrote:
> With some VRR panels, user can turn VRR ON/OFF on the fly from the panel 
> settings.
> When VRR is turned OFF ,sends a long HPD to the driver clearing the Ignore 
> MSA bit
> in the DPCD. Currently the driver parses that onevery HPD but fails to reset
> the corresponding VRR Capable Connector property.
> Hence the userspace still sees this as VRR Capable panel which is incorrect.
> 
> Fix this by explicitly resetting the connector property.
> 
> v2: Reset vrr capable if status == connector_disconnected
> v3: Use i915 and use bool vrr_capable (Jani Nikula)
> v4: Move vrr_capable to after update modes call (Jani N)
> Remove the redundant comment (Jan N)
> v5: Fixes the regression on older platforms by reseting the VRR
> only if HAS_VRR
> v6: Remove the checks from driver, add in drm core before
> setting VRR prop (Ville)
> v7: Move VRR set/reset to set/unset_edid (Ville)
> 
> Cc: Jani Nikula 
> Cc: Ville Syrjälä 
> Fixes: 390a1f8beb87 ("Revert "drm/i915/display/vrr: Reset VRR capable 
> property on a long hpd")
> Signed-off-by: Manasi Navare 
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c | 17 +++--
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
> b/drivers/gpu/drm/i915/display/intel_dp.c
> index d6ef33096bb6..1d0f8fc39005 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -4385,13 +4385,20 @@ intel_dp_update_420(struct intel_dp *intel_dp)
>  static void
>  intel_dp_set_edid(struct intel_dp *intel_dp)
>  {
> + struct drm_i915_private *i915 = dp_to_i915(intel_dp);
>   struct intel_connector *connector = intel_dp->attached_connector;
>   struct edid *edid;
> + bool vrr_capable;
>  
>   intel_dp_unset_edid(intel_dp);
>   edid = intel_dp_get_edid(intel_dp);
>   connector->detect_edid = edid;
>  
> + vrr_capable = intel_vrr_is_capable(>base);
> + drm_dbg_kms(>drm, "[CONNECTOR:%d:%s] VRR capable: %s\n",
> + connector->base.base.id, connector->base.name, 
> str_yes_no(vrr_capable));
> + drm_connector_set_vrr_capable_property(>base, vrr_capable);
> +
>   intel_dp_update_dfp(intel_dp, edid);
>   intel_dp_update_420(intel_dp);
>  
> @@ -4424,6 +4431,9 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
>  
>   intel_dp->dfp.ycbcr_444_to_420 = false;
>   connector->base.ycbcr_420_allowed = false;
> +
> + drm_connector_set_vrr_capable_property(>base,
> +false);
>  }
>  
>  static int
> @@ -4574,14 +4584,9 @@ static int intel_dp_get_modes(struct drm_connector 
> *connector)
>   int num_modes = 0;
>  
>   edid = intel_connector->detect_edid;
> - if (edid) {
> + if (edid)
>   num_modes = intel_connector_update_modes(connector, edid);
>  
> - if (intel_vrr_is_capable(connector))
> - drm_connector_set_vrr_capable_property(connector,
> -true);
> - }
> -
>   /* Also add fixed mode, which may or may not be present in EDID */
>   if (intel_dp_is_edp(intel_attached_dp(intel_connector)) &&
>   intel_connector->panel.fixed_mode) {
> -- 
> 2.19.1
> 


[Intel-gfx] [PATCH] drm/i915/mst: re-enable link training failure fallback for DP MST

2022-03-07 Thread Jani Nikula
Commit 80a8cecf62a5 ("drm/i915/dp_mst: Disable link training fallback on
MST links") disabled link training failure fallback for DP MST due to
the MST manager using the DPCD directly, and generally being ignorant
about the possibility of downgrading link parameters. See the commit for
further details.

Since then, the max_lane_count and max_link_rate members have been added
to struct drm_dp_mst_topology_mgr in commit 98025a62cb00 ("drm/dp_mst:
Use Extended Base Receiver Capability DPCD space") and refined in
follow-up work.

The members perhaps aren't intended for changing the parameters during
the lifetime of the manager, as they're supposed to be passed to
drm_dp_mst_topology_mgr_init(). However, the members are only ever used
in drm_dp_mst_topology_mgr_set_mst(), and there seems to be nothing to
prevent us from adjusting them *before* enabling MST. The wouldn't have
an effect if modified while MST is enabled. This is not necessarily
pretty, though.

Cc: Nikola Cornij 
Cc: Lyude Paul 
Cc: Imre Deak 
Cc: Ville Syrjälä 
Cc: Uma Shankar 
Signed-off-by: Jani Nikula 

---

This is *untested*. I don't see why it wouldn't work, though... this
should allow us to downgrade the link to from 128b/132b to 8b/10b if the
former fails.

Thoughts? In particular, any objections for messing with the topology
manager members directly? Any chance it'll make refactoring the MST code
more difficult?
---
 drivers/gpu/drm/i915/display/intel_dp.c | 42 ++---
 drivers/gpu/drm/i915/display/intel_dp_mst.c |  5 ++-
 2 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
b/drivers/gpu/drm/i915/display/intel_dp.c
index 619546441eae..2fad3104b40e 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -600,15 +600,6 @@ int intel_dp_get_link_train_fallback_values(struct 
intel_dp *intel_dp,
struct drm_i915_private *i915 = dp_to_i915(intel_dp);
int index;
 
-   /*
-* TODO: Enable fallback on MST links once MST link compute can handle
-* the fallback params.
-*/
-   if (intel_dp->is_mst) {
-   drm_err(>drm, "Link Training Unsuccessful\n");
-   return -1;
-   }
-
if (intel_dp_is_edp(intel_dp) && !intel_dp->use_max_params) {
drm_dbg_kms(>drm,
"Retrying Link training for eDP with max 
parameters\n");
@@ -2785,6 +2776,8 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
struct drm_i915_private *i915 = dp_to_i915(intel_dp);
struct intel_encoder *encoder =
_to_dig_port(intel_dp)->base;
+   struct drm_dp_mst_topology_mgr *mgr = _dp->mst_mgr;
+
bool sink_can_mst = drm_dp_read_mst_cap(_dp->aux, intel_dp->dpcd);
 
drm_dbg_kms(>drm,
@@ -2800,8 +2793,17 @@ intel_dp_configure_mst(struct intel_dp *intel_dp)
intel_dp->is_mst = sink_can_mst &&
i915->params.enable_dp_mst;
 
-   drm_dp_mst_topology_mgr_set_mst(_dp->mst_mgr,
-   intel_dp->is_mst);
+   /*
+* Set the source max lane count and link rate using the possibly
+* limited values due to failed link training.
+*
+* This is a bit hackish, as the values are supposed to be passed to
+* drm_dp_mst_topology_mgr_init().
+*/
+   mgr->max_lane_count = intel_dp->max_link_lane_count;
+   mgr->max_link_rate = intel_dp->max_link_rate;
+
+   drm_dp_mst_topology_mgr_set_mst(mgr, intel_dp->is_mst);
 }
 
 static bool
@@ -4472,23 +4474,19 @@ intel_dp_detect(struct drm_connector *connector,
goto out;
}
 
-   /* Read DP Sink DSC Cap DPCD regs for DP v1.4 */
-   if (DISPLAY_VER(dev_priv) >= 11)
-   intel_dp_get_dsc_sink_cap(intel_dp);
-
-   intel_dp_configure_mst(intel_dp);
-
-   /*
-* TODO: Reset link params when switching to MST mode, until MST
-* supports link training fallback params.
-*/
-   if (intel_dp->reset_link_params || intel_dp->is_mst) {
+   if (intel_dp->reset_link_params) {
intel_dp_reset_max_link_params(intel_dp);
intel_dp->reset_link_params = false;
}
 
intel_dp_print_rates(intel_dp);
 
+   /* Read DP Sink DSC Cap DPCD regs for DP v1.4 */
+   if (DISPLAY_VER(dev_priv) >= 11)
+   intel_dp_get_dsc_sink_cap(intel_dp);
+
+   intel_dp_configure_mst(intel_dp);
+
if (intel_dp->is_mst) {
/*
 * If we are in MST mode then this connector
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c 
b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index e30e698aa684..442dbd0ed201 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -151,8 +151,9 @@ static int intel_dp_mst_compute_config(struct intel_encoder 
*encoder,

Re: [Intel-gfx] [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Matthew Auld
On Mon, 7 Mar 2022 at 18:41, Ville Syrjälä
 wrote:
>
> On Mon, Mar 07, 2022 at 06:26:32PM +, Matthew Auld wrote:
> > On 07/03/2022 17:06, Ville Syrjälä wrote:
> > > On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:
> > >> On 04/03/2022 19:33, Ville Syrjälä wrote:
> > >>> On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:
> >  The offset we get looks to be the exact start of DSM, but the
> >  inital_plane_vma expects the address to be relative.
> > 
> >  Signed-off-by: Matthew Auld 
> >  Cc: Thomas Hellström 
> >  ---
> > .../drm/i915/display/intel_plane_initial.c| 22 
> >  +++
> > 1 file changed, 18 insertions(+), 4 deletions(-)
> > 
> >  diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
> >  b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >  index f797fcef18fc..b39d3a8dfe45 100644
> >  --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >  +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >  @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
> >   if (!mem || plane_config->size == 0)
> >   return NULL;
> > 
> >  -base = round_down(plane_config->base,
> >  -  I915_GTT_MIN_ALIGNMENT);
> >  -size = round_up(plane_config->base + plane_config->size,
> >  -mem->min_page_size);
> >  +base = plane_config->base;
> >  +if (IS_DGFX(i915)) {
> >  +/*
> >  + * On discrete the base address should be somewhere 
> >  in LMEM, but
> >  + * depending on the size of LMEM the base address 
> >  might
> >  + * intersect with the start of DSM, like on DG1, in 
> >  which case
> >  + * we need the relative address. In such cases we 
> >  might also
> >  + * need to choose between inital fb vs fbc, if space 
> >  is limited.
> >  + *
> >  + * On future discrete HW, like DG2, we should be able 
> >  to just
> >  + * allocate directly from LMEM, due to larger LMEM 
> >  size.
> >  + */
> >  +if (base >= i915->dsm.start)
> >  +base -= i915->dsm.start;
> > >>>
> > >>> Subsequent code expects the object to actually be inside stolen.
> > >>> If that is not the case we should just give up.
> > >>
> > >> Thanks for taking a look at this. Is that subsequent code outside
> > >> initial_plane_vma()? In the next patch this is now using LMEM directly
> > >> for dg2. Would that blow up somewhere else?
> > >
> > > It uses i915_gem_object_create_stolen_for_preallocated() which assumes
> > > the stuff is inside stolen.
> >
> > At the start of the series that gets ripped out and replaced with
> > i915_gem_object_create_region_at(), where we can now just pass in the
> > intel_memory_region, and the backend hopefully takes care of the rest.
>
> Why? Is the BIOS no longer allocating its fbs from stolen?

On discrete, so far DSM is always just snipped off the end of lmem. On
DG1, which only has 4G lmem, the base seems to always exactly match
the DSM start(not sure if this is a fluke). However on DG2, which has
much larger lmem size, the base is still the same IIRC, but it isn't
even close to where DSM is located on such a device. Best guess is
that we were meant to just treat the bios fb(or that part of stolen
lmem) as a part of normal lmem, and might explain why the base is not
relative to the dsm.start like on integrated?

>
> >
> > >
> > >>> The fact that we fail to confirm any of that on integrated
> > >>> parts has always bugged me, but not enough to actually do
> > >>> anything about it. Such a check would be somewhat more involved
> > >>> since we'd have to look at the PTEs. But on discrete sounds like
> > >>> we can get away with a trivial check.
> > >>
> > >> Which PTEs?
> > >
> > > The PTEs the plane is actually using. We have no idea where they
> > > actually point to and just assume they represent a 1:1 mapping of
> > > stolen.
> > >
> > > I suppose with lmem we'll just start assuming a 1:1 mapping of
> > > the whole lmem rather than just stolen.
> >
> > So IIUC the base that we read is actually some GGTT address(I guess it
> > comes pre-programmed or something?), and that hopefully 1:1 maps to
> > stolen. Ok, so as you say, I guess we only want to subtract the
> > dsm.start for the physical allocation, and not the GGTT address, when
> > dealing with stolen lmem.
> >
> > >
>
> --
> Ville Syrjälä
> Intel


[Intel-gfx] ✗ Fi.CI.DOCS: warning for Add CDCLK checks to atomic check phase (rev2)

2022-03-07 Thread Patchwork
== Series Details ==

Series: Add CDCLK checks to atomic check phase (rev2)
URL   : https://patchwork.freedesktop.org/series/101068/
State : warning

== Summary ==

$ make htmldocs 2>&1 > /dev/null | grep i915
./drivers/gpu/drm/i915/display/intel_cdclk.c:2035: warning: Function parameter 
or member 'i915' not described in 'intel_cdclk_needs_modeset'
./drivers/gpu/drm/i915/display/intel_cdclk.c:2103: warning: Function parameter 
or member 'i915' not described in 'intel_cdclk_changed'




[Intel-gfx] ✗ Fi.CI.SPARSE: warning for Add CDCLK checks to atomic check phase (rev2)

2022-03-07 Thread Patchwork
== Series Details ==

Series: Add CDCLK checks to atomic check phase (rev2)
URL   : https://patchwork.freedesktop.org/series/101068/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Add CDCLK checks to atomic check phase (rev2)

2022-03-07 Thread Patchwork
== Series Details ==

Series: Add CDCLK checks to atomic check phase (rev2)
URL   : https://patchwork.freedesktop.org/series/101068/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
a6b602089dfe drm/i915/display: Add CDCLK actions to intel_cdclk_state
287fb31ae0d4 drm/i915/display: s/intel_cdclk_can_squash/intel_cdclk_squash
-:28: CHECK:BRACES: Blank lines aren't necessary after an open brace '{'
#28: FILE: drivers/gpu/drm/i915/display/intel_cdclk.c:1980:
 {
+

total: 0 errors, 0 warnings, 1 checks, 40 lines checked
63062936667f drm/i915/display: s/intel_cdclk_can_crawl/intel_cdclk_crawl
606dc35fcfa9 drm/i915/display: Add drm_i915_private to 
intel_cdclk_needs_modeset()
5c4cc47d2fc7 drm/i915/display: Add cdclk checks to atomic check




[Intel-gfx] [PATCH 4/5] drm/i915/display: Add drm_i915_private to intel_cdclk_needs_modeset()

2022-03-07 Thread Anusha Srivatsa
The change is to be able to have access to the in-flight state.
Changing this one functions, trickles the change to
intel_cdclk_changed()

Signed-off-by: Anusha Srivatsa 
---
 drivers/gpu/drm/i915/display/intel_cdclk.c| 22 ++-
 drivers/gpu/drm/i915/display/intel_cdclk.h|  3 ++-
 .../drm/i915/display/intel_display_power.c|  2 +-
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 840d611197cf..2278b052d859 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2003,7 +2003,8 @@ static bool intel_cdclk_squash(struct drm_i915_private 
*dev_priv,
  * True if changing between the two CDCLK configurations
  * requires all pipes to be off, false if not.
  */
-bool intel_cdclk_needs_modeset(const struct intel_cdclk_config *a,
+bool intel_cdclk_needs_modeset(struct drm_i915_private *i915,
+  const struct intel_cdclk_config *a,
   const struct intel_cdclk_config *b)
 {
return a->cdclk != b->cdclk ||
@@ -2053,10 +2054,11 @@ static bool intel_cdclk_can_cd2x_update(struct 
drm_i915_private *dev_priv,
  * Returns:
  * True if the CDCLK configurations don't match, false if they do.
  */
-static bool intel_cdclk_changed(const struct intel_cdclk_config *a,
+static bool intel_cdclk_changed(struct drm_i915_private *i915,
+   const struct intel_cdclk_config *a,
const struct intel_cdclk_config *b)
 {
-   return intel_cdclk_needs_modeset(a, b) ||
+   return intel_cdclk_needs_modeset(i915, a, b) ||
a->voltage_level != b->voltage_level;
 }
 
@@ -2085,7 +2087,7 @@ static void intel_set_cdclk(struct drm_i915_private 
*dev_priv,
 {
struct intel_encoder *encoder;
 
-   if (!intel_cdclk_changed(_priv->cdclk.hw, cdclk_config))
+   if (!intel_cdclk_changed(dev_priv, _priv->cdclk.hw, cdclk_config))
return;
 
if (drm_WARN_ON_ONCE(_priv->drm, !dev_priv->cdclk_funcs->set_cdclk))
@@ -2132,7 +2134,7 @@ static void intel_set_cdclk(struct drm_i915_private 
*dev_priv,
intel_audio_cdclk_change_post(dev_priv);
 
if (drm_WARN(_priv->drm,
-intel_cdclk_changed(_priv->cdclk.hw, cdclk_config),
+intel_cdclk_changed(dev_priv, _priv->cdclk.hw, 
cdclk_config),
 "cdclk state doesn't match!\n")) {
intel_cdclk_dump_config(dev_priv, _priv->cdclk.hw, "[hw 
state]");
intel_cdclk_dump_config(dev_priv, cdclk_config, "[sw state]");
@@ -2156,7 +2158,7 @@ intel_set_cdclk_pre_plane_update(struct 
intel_atomic_state *state)
intel_atomic_get_new_cdclk_state(state);
enum pipe pipe = new_cdclk_state->pipe;
 
-   if (!intel_cdclk_changed(_cdclk_state->actual,
+   if (!intel_cdclk_changed(dev_priv, _cdclk_state->actual,
 _cdclk_state->actual))
return;
 
@@ -2185,7 +2187,7 @@ intel_set_cdclk_post_plane_update(struct 
intel_atomic_state *state)
intel_atomic_get_new_cdclk_state(state);
enum pipe pipe = new_cdclk_state->pipe;
 
-   if (!intel_cdclk_changed(_cdclk_state->actual,
+   if (!intel_cdclk_changed(dev_priv, _cdclk_state->actual,
 _cdclk_state->actual))
return;
 
@@ -2739,7 +2741,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state 
*state)
if (ret)
return ret;
 
-   if (intel_cdclk_changed(_cdclk_state->actual,
+   if (intel_cdclk_changed(dev_priv, _cdclk_state->actual,
_cdclk_state->actual)) {
/*
 * Also serialize commits across all crtcs
@@ -2750,7 +2752,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state 
*state)
return ret;
} else if (old_cdclk_state->active_pipes != 
new_cdclk_state->active_pipes ||
   old_cdclk_state->force_min_cdclk != 
new_cdclk_state->force_min_cdclk ||
-  intel_cdclk_changed(_cdclk_state->logical,
+  intel_cdclk_changed(dev_priv, _cdclk_state->logical,
   _cdclk_state->logical)) {
ret = intel_atomic_lock_global_state(_cdclk_state->base);
if (ret)
@@ -2793,7 +2795,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state 
*state)
drm_dbg_kms(_priv->drm,
"Can change cdclk cd2x divider with pipe %c 
active\n",
pipe_name(pipe));
-   } else if (intel_cdclk_needs_modeset(_cdclk_state->actual,
+   } else if (intel_cdclk_needs_modeset(dev_priv, _cdclk_state->actual,
 _cdclk_state->actual)) {
/* All pipes must be switched 

[Intel-gfx] [PATCH 1/5] drm/i915/display: Add CDCLK actions to intel_cdclk_state

2022-03-07 Thread Anusha Srivatsa
This is a prep patch for what the rest of the series does.

Add existing actions that change cdclk - squash, crawl, modeset to
intel_cdclk_state so we have access to the cdclk values
that are in transition.

Cc: Jani Nikula 
Signed-off-by: Anusha Srivatsa 
---
 drivers/gpu/drm/i915/display/intel_cdclk.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h 
b/drivers/gpu/drm/i915/display/intel_cdclk.h
index df66f66fbad0..06d7f9f0b253 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.h
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.h
@@ -15,6 +15,14 @@ struct drm_i915_private;
 struct intel_atomic_state;
 struct intel_crtc_state;
 
+enum cdclk_actions {
+   INTEL_CDCLK_MODESET = 0,
+   INTEL_CDCLK_SQUASH,
+   INTEL_CDCLK_CRAWL,
+   INTEL_CDCLK_NOOP,
+   MAX_CDCLK_ACTIONS
+};
+
 struct intel_cdclk_config {
unsigned int cdclk, vco, ref, bypass;
u8 voltage_level;
@@ -49,6 +57,11 @@ struct intel_cdclk_state {
 
/* bitmask of active pipes */
u8 active_pipes;
+
+   struct cdclk_step {
+   enum cdclk_actions action;
+   u32 cdclk;
+   } steps[MAX_CDCLK_ACTIONS];
 };
 
 int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
-- 
2.25.1



[Intel-gfx] [PATCH 3/5] drm/i915/display: s/intel_cdclk_can_crawl/intel_cdclk_crawl

2022-03-07 Thread Anusha Srivatsa
Apart from checking if crawling can be performed,
accommodate accessing in-flight cdclk state for any changes
that are needed during commit phase.

Cc: Jani Nikula 
Signed-off-by: Anusha Srivatsa 
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 1f879af15d87..840d611197cf 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1952,8 +1952,8 @@ void intel_cdclk_uninit_hw(struct drm_i915_private *i915)
 }
 
 static bool intel_cdclk_can_crawl(struct drm_i915_private *dev_priv,
- const struct intel_cdclk_config *a,
- const struct intel_cdclk_config *b)
+ const struct intel_cdclk_state *a,
+ struct intel_cdclk_state *b)
 {
int a_div, b_div;
 
@@ -1964,13 +1964,13 @@ static bool intel_cdclk_can_crawl(struct 
drm_i915_private *dev_priv,
 * The vco and cd2x divider will change independently
 * from each, so we disallow cd2x change when crawling.
 */
-   a_div = DIV_ROUND_CLOSEST(a->vco, a->cdclk);
-   b_div = DIV_ROUND_CLOSEST(b->vco, b->cdclk);
+   a_div = DIV_ROUND_CLOSEST(a->actual.vco, a->actual.cdclk);
+   b_div = DIV_ROUND_CLOSEST(b->actual.vco, b->actual.cdclk);
 
-   return a->vco != 0 && b->vco != 0 &&
-   a->vco != b->vco &&
+   return a->actual.vco != 0 && b->actual.vco != 0 &&
+   a->actual.vco != b->actual.vco &&
a_div == b_div &&
-   a->ref == b->ref;
+   a->actual.ref == b->actual.ref;
 }
 
 static bool intel_cdclk_squash(struct drm_i915_private *dev_priv,
@@ -2783,8 +2783,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state 
*state)
drm_dbg_kms(_priv->drm,
"Can change cdclk via squasher\n");
} else if (intel_cdclk_can_crawl(dev_priv,
-_cdclk_state->actual,
-_cdclk_state->actual)) {
+old_cdclk_state,
+new_cdclk_state)) {
drm_dbg_kms(_priv->drm,
"Can change cdclk via crawl\n");
} else if (pipe != INVALID_PIPE) {
-- 
2.25.1



[Intel-gfx] [PATCH 5/5] drm/i915/display: Add cdclk checks to atomic check

2022-03-07 Thread Anusha Srivatsa
Checking cdclk conditions during atomic check and preparing
for commit phase so we can have atomic commit as simple
as possible. Add the specific steps to be taken during
cdclk changes, prepare for squashing, crawling and modeset
scenarios.

Cc: Jani Nikula 
Signed-off-by: Anusha Srivatsa 
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 111 ++---
 1 file changed, 77 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 2278b052d859..356631f5a16e 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1700,12 +1700,23 @@ static void bxt_set_cdclk(struct drm_i915_private 
*dev_priv,
  const struct intel_cdclk_config *cdclk_config,
  enum pipe pipe)
 {
+   struct intel_atomic_state *state;
+   struct intel_cdclk_state *new_cdclk_state;
+   struct cdclk_step *cdclk_steps;
+   struct intel_cdclk_state *cdclk_state;
int cdclk = cdclk_config->cdclk;
int vco = cdclk_config->vco;
+   u32 squash_ctl = 0;
u32 val;
u16 waveform;
int clock;
int ret;
+   int i;
+
+   cdclk_state =  to_intel_cdclk_state(dev_priv->cdclk.obj.state);
+   state = cdclk_state->base.state;
+   new_cdclk_state = intel_atomic_get_new_cdclk_state(state);
+   cdclk_steps = new_cdclk_state->steps;
 
/* Inform power controller of upcoming frequency change. */
if (DISPLAY_VER(dev_priv) >= 11)
@@ -1728,45 +1739,48 @@ static void bxt_set_cdclk(struct drm_i915_private 
*dev_priv,
return;
}
 
-   if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->cdclk.hw.vco > 0 && vco > 0) 
{
-   if (dev_priv->cdclk.hw.vco != vco)
+   for (i = 0; i < MAX_CDCLK_ACTIONS; i++) {
+   switch (cdclk_steps[i].action) {
+   case INTEL_CDCLK_MODESET:
+   if (DISPLAY_VER(dev_priv) >= 11) {
+   if (dev_priv->cdclk.hw.vco != 0 &&
+   dev_priv->cdclk.hw.vco != vco)
+   icl_cdclk_pll_disable(dev_priv);
+
+   if (dev_priv->cdclk.hw.vco != vco)
+   icl_cdclk_pll_enable(dev_priv, vco);
+   } else {
+   if (dev_priv->cdclk.hw.vco != 0 &&
+   dev_priv->cdclk.hw.vco != vco)
+   bxt_de_pll_disable(dev_priv);
+
+   if (dev_priv->cdclk.hw.vco != vco)
+   bxt_de_pll_enable(dev_priv, vco);
+   }
+   clock = cdclk;
+   break;
+   case INTEL_CDCLK_CRAWL:
adlp_cdclk_pll_crawl(dev_priv, vco);
-   } else if (DISPLAY_VER(dev_priv) >= 11) {
-   if (dev_priv->cdclk.hw.vco != 0 &&
-   dev_priv->cdclk.hw.vco != vco)
-   icl_cdclk_pll_disable(dev_priv);
-
-   if (dev_priv->cdclk.hw.vco != vco)
-   icl_cdclk_pll_enable(dev_priv, vco);
-   } else {
-   if (dev_priv->cdclk.hw.vco != 0 &&
-   dev_priv->cdclk.hw.vco != vco)
-   bxt_de_pll_disable(dev_priv);
-
-   if (dev_priv->cdclk.hw.vco != vco)
-   bxt_de_pll_enable(dev_priv, vco);
-   }
-
-   waveform = cdclk_squash_waveform(dev_priv, cdclk);
-
-   if (waveform)
-   clock = vco / 2;
-   else
-   clock = cdclk;
-
-   if (has_cdclk_squasher(dev_priv)) {
-   u32 squash_ctl = 0;
-
-   if (waveform)
+   clock = cdclk;
+   break;
+   case INTEL_CDCLK_SQUASH:
+   waveform =  cdclk_squash_waveform(dev_priv, 
cdclk_steps[i].cdclk);
+   clock = vco / 2;
squash_ctl = CDCLK_SQUASH_ENABLE |
-   CDCLK_SQUASH_WINDOW_SIZE(0xf) | waveform;
-
-   intel_de_write(dev_priv, CDCLK_SQUASH_CTL, squash_ctl);
+   CDCLK_SQUASH_WINDOW_SIZE(0xf) | waveform;
+   intel_de_write(dev_priv, CDCLK_SQUASH_CTL, squash_ctl);
+   break;
+   case INTEL_CDCLK_NOOP:
+   break;
+   default:
+   MISSING_CASE(cdclk_steps[i].action);
+   break;
+   }
}
 
val = bxt_cdclk_cd2x_div_sel(dev_priv, clock, vco) |
-   bxt_cdclk_cd2x_pipe(dev_priv, pipe) |
-   skl_cdclk_decimal(cdclk);
+ bxt_cdclk_cd2x_pipe(dev_priv, pipe) |
+ skl_cdclk_decimal(cdclk);
 
/*
 * Disable SSA Precharge 

[Intel-gfx] [PATCH 2/5] drm/i915/display: s/intel_cdclk_can_squash/intel_cdclk_squash

2022-03-07 Thread Anusha Srivatsa
Apart from checking if squashing can be performed,
accommodate accessing in-flight cdclk state for any changes
that are needed during commit phase.

Cc: Jani Nikula 
Signed-off-by: Anusha Srivatsa 
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index fda8b701..1f879af15d87 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1973,10 +1973,11 @@ static bool intel_cdclk_can_crawl(struct 
drm_i915_private *dev_priv,
a->ref == b->ref;
 }
 
-static bool intel_cdclk_can_squash(struct drm_i915_private *dev_priv,
-  const struct intel_cdclk_config *a,
-  const struct intel_cdclk_config *b)
+static bool intel_cdclk_squash(struct drm_i915_private *dev_priv,
+  const struct intel_cdclk_state *a,
+  struct intel_cdclk_state *b)
 {
+
/*
 * FIXME should store a bit more state in intel_cdclk_config
 * to differentiate squasher vs. cd2x divider properly. For
@@ -1986,10 +1987,10 @@ static bool intel_cdclk_can_squash(struct 
drm_i915_private *dev_priv,
if (!has_cdclk_squasher(dev_priv))
return false;
 
-   return a->cdclk != b->cdclk &&
-   a->vco != 0 &&
-   a->vco == b->vco &&
-   a->ref == b->ref;
+   return a->actual.cdclk != b->actual.cdclk &&
+   a->actual.vco != 0 &&
+   a->actual.vco == b->actual.vco &&
+   a->actual.ref == b->actual.ref;
 }
 
 /**
@@ -2776,9 +2777,9 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state 
*state)
pipe = INVALID_PIPE;
}
 
-   if (intel_cdclk_can_squash(dev_priv,
-  _cdclk_state->actual,
-  _cdclk_state->actual)) {
+   if (intel_cdclk_squash(dev_priv,
+  old_cdclk_state,
+  new_cdclk_state)) {
drm_dbg_kms(_priv->drm,
"Can change cdclk via squasher\n");
} else if (intel_cdclk_can_crawl(dev_priv,
-- 
2.25.1



[Intel-gfx] [PATCH 0/5] Add CDCLK checks to atomic check phase

2022-03-07 Thread Anusha Srivatsa
This version splits the original patch into simpler units.

The intention is to check for squashing, crawling conditions
at atomic check phase and prepare for commit phase. This basically
means the in-flight cdclk state is available. intel_cdclk_can_squash(),
intel_cdclk_can_crawl() and intel_cdclk_needs_modeset() have changes
to accommodate this.

Cc: Stanislav Lisovskiy 

Anusha Srivatsa (5):
  drm/i915/display: Add CDCLK actions to intel_cdclk_state
  drm/i915/display: s/intel_cdclk_can_squash/intel_cdclk_squash
  drm/i915/display: s/intel_cdclk_can_crawl/intel_cdclk_crawl
  drm/i915/display: Add drm_i915_private to intel_cdclk_needs_modeset()
  drm/i915/display: Add cdclk checks to atomic check

 drivers/gpu/drm/i915/display/intel_cdclk.c| 172 +++---
 drivers/gpu/drm/i915/display/intel_cdclk.h|  16 +-
 .../drm/i915/display/intel_display_power.c|   2 +-
 3 files changed, 125 insertions(+), 65 deletions(-)

-- 
2.25.1



Re: [Intel-gfx] [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Ville Syrjälä
On Mon, Mar 07, 2022 at 06:26:32PM +, Matthew Auld wrote:
> On 07/03/2022 17:06, Ville Syrjälä wrote:
> > On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:
> >> On 04/03/2022 19:33, Ville Syrjälä wrote:
> >>> On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:
>  The offset we get looks to be the exact start of DSM, but the
>  inital_plane_vma expects the address to be relative.
> 
>  Signed-off-by: Matthew Auld 
>  Cc: Thomas Hellström 
>  ---
> .../drm/i915/display/intel_plane_initial.c| 22 +++
> 1 file changed, 18 insertions(+), 4 deletions(-)
> 
>  diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
>  b/drivers/gpu/drm/i915/display/intel_plane_initial.c
>  index f797fcef18fc..b39d3a8dfe45 100644
>  --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
>  +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
>  @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
>   if (!mem || plane_config->size == 0)
>   return NULL;
> 
>  -base = round_down(plane_config->base,
>  -  I915_GTT_MIN_ALIGNMENT);
>  -size = round_up(plane_config->base + plane_config->size,
>  -mem->min_page_size);
>  +base = plane_config->base;
>  +if (IS_DGFX(i915)) {
>  +/*
>  + * On discrete the base address should be somewhere in 
>  LMEM, but
>  + * depending on the size of LMEM the base address might
>  + * intersect with the start of DSM, like on DG1, in 
>  which case
>  + * we need the relative address. In such cases we might 
>  also
>  + * need to choose between inital fb vs fbc, if space is 
>  limited.
>  + *
>  + * On future discrete HW, like DG2, we should be able 
>  to just
>  + * allocate directly from LMEM, due to larger LMEM size.
>  + */
>  +if (base >= i915->dsm.start)
>  +base -= i915->dsm.start;
> >>>
> >>> Subsequent code expects the object to actually be inside stolen.
> >>> If that is not the case we should just give up.
> >>
> >> Thanks for taking a look at this. Is that subsequent code outside
> >> initial_plane_vma()? In the next patch this is now using LMEM directly
> >> for dg2. Would that blow up somewhere else?
> > 
> > It uses i915_gem_object_create_stolen_for_preallocated() which assumes
> > the stuff is inside stolen.
> 
> At the start of the series that gets ripped out and replaced with 
> i915_gem_object_create_region_at(), where we can now just pass in the 
> intel_memory_region, and the backend hopefully takes care of the rest.

Why? Is the BIOS no longer allocating its fbs from stolen?

> 
> > 
> >>> The fact that we fail to confirm any of that on integrated
> >>> parts has always bugged me, but not enough to actually do
> >>> anything about it. Such a check would be somewhat more involved
> >>> since we'd have to look at the PTEs. But on discrete sounds like
> >>> we can get away with a trivial check.
> >>
> >> Which PTEs?
> > 
> > The PTEs the plane is actually using. We have no idea where they
> > actually point to and just assume they represent a 1:1 mapping of
> > stolen.
> > 
> > I suppose with lmem we'll just start assuming a 1:1 mapping of
> > the whole lmem rather than just stolen.
> 
> So IIUC the base that we read is actually some GGTT address(I guess it 
> comes pre-programmed or something?), and that hopefully 1:1 maps to 
> stolen. Ok, so as you say, I guess we only want to subtract the 
> dsm.start for the physical allocation, and not the GGTT address, when 
> dealing with stolen lmem.
> 
> > 

-- 
Ville Syrjälä
Intel


[Intel-gfx] ✓ Fi.CI.IGT: success for drm: remove min_order BUG_ON check

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm: remove min_order BUG_ON check
URL   : https://patchwork.freedesktop.org/series/101108/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11332_full -> Patchwork_22502_full


Summary
---

  **SUCCESS**

  No regressions found.

  

Participating hosts (13 -> 13)
--

  No changes in participating hosts

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_22502_full:

### IGT changes ###

 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_exec_schedule@deep@vcs0:
- {shard-rkl}:NOTRUN -> [INCOMPLETE][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-rkl-5/igt@gem_exec_schedule@d...@vcs0.html

  * igt@i915_selftest@live@hangcheck:
- {shard-rkl}:[PASS][2] -> [INCOMPLETE][3]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-rkl-6/igt@i915_selftest@l...@hangcheck.html
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-rkl-5/igt@i915_selftest@l...@hangcheck.html

  * 
{igt@kms_plane_scaling@scaler-with-pixel-format-unity-scaling@pipe-b-edp-1-scaler-with-pixel-format}:
- shard-iclb: [PASS][4] -> [INCOMPLETE][5]
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb7/igt@kms_plane_scaling@scaler-with-pixel-format-unity-scal...@pipe-b-edp-1-scaler-with-pixel-format.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-iclb2/igt@kms_plane_scaling@scaler-with-pixel-format-unity-scal...@pipe-b-edp-1-scaler-with-pixel-format.html

  
Known issues


  Here are the changes found in Patchwork_22502_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@feature_discovery@display-3x:
- shard-glk:  NOTRUN -> [SKIP][6] ([fdo#109271]) +40 similar issues
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-glk8/igt@feature_discov...@display-3x.html

  * igt@gem_eio@in-flight-1us:
- shard-tglb: [PASS][7] -> [TIMEOUT][8] ([i915#3063])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-tglb6/igt@gem_...@in-flight-1us.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-tglb3/igt@gem_...@in-flight-1us.html

  * igt@gem_exec_capture@pi@rcs0:
- shard-iclb: [PASS][9] -> [INCOMPLETE][10] ([i915#3371])
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb6/igt@gem_exec_capture@p...@rcs0.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-iclb5/igt@gem_exec_capture@p...@rcs0.html

  * igt@gem_exec_fair@basic-deadline:
- shard-skl:  NOTRUN -> [FAIL][11] ([i915#2846])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-skl9/igt@gem_exec_f...@basic-deadline.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
- shard-iclb: [PASS][12] -> [FAIL][13] ([i915#2842])
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb1/igt@gem_exec_fair@basic-none-sh...@rcs0.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-iclb3/igt@gem_exec_fair@basic-none-sh...@rcs0.html

  * igt@gem_exec_fair@basic-none-solo@rcs0:
- shard-kbl:  NOTRUN -> [FAIL][14] ([i915#2842])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-kbl7/igt@gem_exec_fair@basic-none-s...@rcs0.html

  * igt@gem_exec_fair@basic-pace@vecs0:
- shard-kbl:  [PASS][15] -> [FAIL][16] ([i915#2842])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-kbl1/igt@gem_exec_fair@basic-p...@vecs0.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-kbl4/igt@gem_exec_fair@basic-p...@vecs0.html

  * igt@gem_huc_copy@huc-copy:
- shard-tglb: [PASS][17] -> [SKIP][18] ([i915#2190])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-tglb2/igt@gem_huc_c...@huc-copy.html
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-tglb7/igt@gem_huc_c...@huc-copy.html
- shard-skl:  NOTRUN -> [SKIP][19] ([fdo#109271] / [i915#2190])
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-skl6/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@heavy-multi:
- shard-glk:  NOTRUN -> [SKIP][20] ([fdo#109271] / [i915#4613])
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-glk8/igt@gem_lmem_swapp...@heavy-multi.html

  * igt@gem_lmem_swapping@heavy-verify-multi:
- shard-kbl:  NOTRUN -> [SKIP][21] ([fdo#109271] / [i915#4613])
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/shard-kbl7/igt@gem_lmem_swapp...@heavy-verify-multi.html

  * igt@gem_lmem_swapping@verify:
- shard-apl:  NOTRUN 

Re: [Intel-gfx] [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Matthew Auld

On 07/03/2022 17:06, Ville Syrjälä wrote:

On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:

On 04/03/2022 19:33, Ville Syrjälä wrote:

On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:

The offset we get looks to be the exact start of DSM, but the
inital_plane_vma expects the address to be relative.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
   .../drm/i915/display/intel_plane_initial.c| 22 +++
   1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index f797fcef18fc..b39d3a8dfe45 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
if (!mem || plane_config->size == 0)
return NULL;
   
-	base = round_down(plane_config->base,

- I915_GTT_MIN_ALIGNMENT);
-   size = round_up(plane_config->base + plane_config->size,
-   mem->min_page_size);
+   base = plane_config->base;
+   if (IS_DGFX(i915)) {
+   /*
+* On discrete the base address should be somewhere in LMEM, but
+* depending on the size of LMEM the base address might
+* intersect with the start of DSM, like on DG1, in which case
+* we need the relative address. In such cases we might also
+* need to choose between inital fb vs fbc, if space is limited.
+*
+* On future discrete HW, like DG2, we should be able to just
+* allocate directly from LMEM, due to larger LMEM size.
+*/
+   if (base >= i915->dsm.start)
+   base -= i915->dsm.start;


Subsequent code expects the object to actually be inside stolen.
If that is not the case we should just give up.


Thanks for taking a look at this. Is that subsequent code outside
initial_plane_vma()? In the next patch this is now using LMEM directly
for dg2. Would that blow up somewhere else?


It uses i915_gem_object_create_stolen_for_preallocated() which assumes
the stuff is inside stolen.


At the start of the series that gets ripped out and replaced with 
i915_gem_object_create_region_at(), where we can now just pass in the 
intel_memory_region, and the backend hopefully takes care of the rest.





The fact that we fail to confirm any of that on integrated
parts has always bugged me, but not enough to actually do
anything about it. Such a check would be somewhat more involved
since we'd have to look at the PTEs. But on discrete sounds like
we can get away with a trivial check.


Which PTEs?


The PTEs the plane is actually using. We have no idea where they
actually point to and just assume they represent a 1:1 mapping of
stolen.

I suppose with lmem we'll just start assuming a 1:1 mapping of
the whole lmem rather than just stolen.


So IIUC the base that we read is actually some GGTT address(I guess it 
comes pre-programmed or something?), and that hopefully 1:1 maps to 
stolen. Ok, so as you say, I guess we only want to subtract the 
dsm.start for the physical allocation, and not the GGTT address, when 
dealing with stolen lmem.






Re: [Intel-gfx] [PATCH 07/11] drm/i915: Store the m2 divider as a whole in bxt_clk_div

2022-03-07 Thread Ville Syrjälä
On Fri, Mar 04, 2022 at 01:36:27PM +0200, Jani Nikula wrote:
> On Tue, 01 Mar 2022, Ville Syrjala  wrote:
> > From: Ville Syrjälä 
> >
> > Get rid of the pointless m2 int vs. frac split in bxt_clk_div
> > and just store the whole divider as one.
> >
> > Signed-off-by: Ville Syrjälä 
> > ---
> >  drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 33 +++
> >  1 file changed, 19 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c 
> > b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
> > index 899aa42a858f..4a82e630cbec 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
> > @@ -2085,8 +2085,7 @@ struct bxt_clk_div {
> > int clock;
> > u32 p1;
> > u32 p2;
> > -   u32 m2_int;
> > -   u32 m2_frac;
> > +   u32 m2;
> > u32 n;
> >  
> > int vco;
> > @@ -2094,13 +2093,20 @@ struct bxt_clk_div {
> >  
> >  /* pre-calculated values for DP linkrates */
> >  static const struct bxt_clk_div bxt_dp_clk_val[] = {
> > -   { .clock = 162000, .p1 = 4, .p2 = 2, .m2_int = 32, .m2_frac = 1677722, 
> > .n = 1, },
> > -   { .clock = 27, .p1 = 4, .p2 = 1, .m2_int = 27, .m2_frac =   0, 
> > .n = 1, },
> > -   { .clock = 54, .p1 = 2, .p2 = 1, .m2_int = 27, .m2_frac =   0, 
> > .n = 1, },
> > -   { .clock = 216000, .p1 = 3, .p2 = 2, .m2_int = 32, .m2_frac = 1677722, 
> > .n = 1, },
> > -   { .clock = 243000, .p1 = 4, .p2 = 1, .m2_int = 24, .m2_frac = 1258291, 
> > .n = 1, },
> > -   { .clock = 324000, .p1 = 4, .p2 = 1, .m2_int = 32, .m2_frac = 1677722, 
> > .n = 1, },
> > -   { .clock = 432000, .p1 = 3, .p2 = 1, .m2_int = 32, .m2_frac = 1677722, 
> > .n = 1, },
> > +   { .clock = 162000, .p1 = 4, .p2 = 2, .n = 1,
> > + .m2 = 0x81a /* .m2_int = 32, m2_frac = 1677722 */ },
> > +   { .clock = 27, .p1 = 4, .p2 = 1, .n = 1,
> > + .m2 = 0x6c0 /* .m2_int = 27, m2_frac =   0 */ },
> > +   { .clock = 54, .p1 = 2, .p2 = 1, .n = 1,
> > + .m2 = 0x6c0 /* .m2_int = 27, m2_frac =   0 */ },
> > +   { .clock = 216000, .p1 = 3, .p2 = 2, .n = 1,
> > + .m2 = 0x81a /* .m2_int = 32, m2_frac = 1677722 */ },
> > +   { .clock = 243000, .p1 = 4, .p2 = 1, .n = 1,
> > + .m2 = 0x613 /* .m2_int = 24, m2_frac = 1258291 */ },
> > +   { .clock = 324000, .p1 = 4, .p2 = 1, .n = 1,
> > + .m2 = 0x81a /* .m2_int = 32, m2_frac = 1677722 */ },
> > +   { .clock = 432000, .p1 = 3, .p2 = 1, .n = 1,
> > + .m2 = 0x81a /* .m2_int = 32, m2_frac = 1677722 */ },
> 
> Mmh, I guess here I would've added some macros to construct m2 from
> m2_int and m2_frac.
> 
> #define M2_INT_SHIFT  22
> #define M2_FRAC_MASK  0x3f
> 
> #define M2(int, frac) ((int) << M2_INT_SHIFT) | (frac))

I don't think this weird decimal representation of m2 is useful
for anything actually. I just copy-pasted it from the chv side
for consistency. Should just probably nuke it for both.

I guess the sensible thing would be to just write the full m2 in
decimal in the comment, eg. ".m2 = 0x81a /* 32.4 */"

Hmm. Or we could even go a bit further and just do:
.m2 = 32.4 * (1 << 22) + .5
and hope the compiler evaluates it at compile time instead
of getting upset about the floats.

> 
> And you get this:
> 
>   { .clock = 432000, .p1 = 3, .p2 = 1, .m2 = M2(32, 1677722), .n = 1, },
> 
> No need to retain the int/frac in comments. Can also use
> REG_FIELD_PREP/GET if you want to over-engineer...
> 
> >  };
> >  
> >  static bool
> > @@ -2127,8 +2133,7 @@ bxt_ddi_hdmi_pll_dividers(struct intel_crtc_state 
> > *crtc_state,
> > clk_div->p2 = best_clock.p2;
> > drm_WARN_ON(>drm, best_clock.m1 != 2);
> > clk_div->n = best_clock.n;
> > -   clk_div->m2_int = best_clock.m2 >> 22;
> > -   clk_div->m2_frac = best_clock.m2 & ((1 << 22) - 1);
> > +   clk_div->m2 = best_clock.m2;
> >  
> > clk_div->vco = best_clock.vco;
> >  
> > @@ -2197,11 +2202,11 @@ static bool bxt_ddi_set_dpll_hw_state(struct 
> > intel_crtc_state *crtc_state,
> > lanestagger = 0x02;
> >  
> > dpll_hw_state->ebb0 = PORT_PLL_P1(clk_div->p1) | 
> > PORT_PLL_P2(clk_div->p2);
> > -   dpll_hw_state->pll0 = clk_div->m2_int;
> > +   dpll_hw_state->pll0 = clk_div->m2 >> 22;
> > dpll_hw_state->pll1 = PORT_PLL_N(clk_div->n);
> > -   dpll_hw_state->pll2 = clk_div->m2_frac;
> > +   dpll_hw_state->pll2 = clk_div->m2 & 0x3f;

I should probably use the REG_FIELD_PREP() macros consistently here.

> >  
> > -   if (clk_div->m2_frac)
> > +   if (clk_div->m2 & 0x3f)
> > dpll_hw_state->pll3 = PORT_PLL_M2_FRAC_ENABLE;

But here such usage would imply tht the reg value == m2
fractional part. That does happen to be the case here but 
not sure I want to write the code in a way that assumes that.

> 
> Also could reuse the shift and mask macros here.
> 
> Other than that, the direction seems good.
> 
> BR,
> Jani.
> 
> 
> >  
> > dpll_hw_state->pll6 = prop_coef | 

[Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915: opportunistically apply ALLOC_CONTIGIOUS (rev2)

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915: opportunistically apply ALLOC_CONTIGIOUS (rev2)
URL   : https://patchwork.freedesktop.org/series/99631/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11332_full -> Patchwork_22498_full


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_22498_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_22498_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (13 -> 13)
--

  No changes in participating hosts

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_22498_full:

### IGT changes ###

 Possible regressions 

  * igt@kms_plane@plane-position-hole@pipe-b-planes:
- shard-tglb: [PASS][1] -> [INCOMPLETE][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-tglb3/igt@kms_plane@plane-position-h...@pipe-b-planes.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-tglb8/igt@kms_plane@plane-position-h...@pipe-b-planes.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@gem_exec_parallel@contexts@vecs0:
- {shard-rkl}:[PASS][3] -> ([PASS][4], [DMESG-WARN][5]) +1 similar 
issue
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-rkl-2/igt@gem_exec_parallel@conte...@vecs0.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-rkl-4/igt@gem_exec_parallel@conte...@vecs0.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-rkl-5/igt@gem_exec_parallel@conte...@vecs0.html

  
Known issues


  Here are the changes found in Patchwork_22498_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@feature_discovery@display-3x:
- shard-glk:  NOTRUN -> [SKIP][6] ([fdo#109271]) +40 similar issues
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-glk6/igt@feature_discov...@display-3x.html

  * igt@gem_eio@unwedge-stress:
- shard-iclb: [PASS][7] -> [TIMEOUT][8] ([i915#2481] / [i915#3070])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb7/igt@gem_...@unwedge-stress.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-iclb7/igt@gem_...@unwedge-stress.html

  * igt@gem_exec_balancer@parallel-balancer:
- shard-iclb: [PASS][9] -> [SKIP][10] ([i915#4525])
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb1/igt@gem_exec_balan...@parallel-balancer.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-iclb3/igt@gem_exec_balan...@parallel-balancer.html

  * igt@gem_exec_capture@pi@rcs0:
- shard-iclb: [PASS][11] -> [INCOMPLETE][12] ([i915#3371])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb6/igt@gem_exec_capture@p...@rcs0.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-iclb5/igt@gem_exec_capture@p...@rcs0.html

  * igt@gem_exec_fair@basic-deadline:
- shard-skl:  NOTRUN -> [FAIL][13] ([i915#2846])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-skl1/igt@gem_exec_f...@basic-deadline.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
- shard-iclb: [PASS][14] -> [FAIL][15] ([i915#2842]) +1 similar 
issue
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb1/igt@gem_exec_fair@basic-none-sh...@rcs0.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-iclb4/igt@gem_exec_fair@basic-none-sh...@rcs0.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
- shard-tglb: [PASS][16] -> [FAIL][17] ([i915#2842])
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-tglb2/igt@gem_exec_fair@basic-pace-sh...@rcs0.html
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-tglb3/igt@gem_exec_fair@basic-pace-sh...@rcs0.html

  * igt@gem_huc_copy@huc-copy:
- shard-skl:  NOTRUN -> [SKIP][18] ([fdo#109271] / [i915#2190])
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-skl3/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@heavy-multi:
- shard-glk:  NOTRUN -> [SKIP][19] ([fdo#109271] / [i915#4613])
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-glk6/igt@gem_lmem_swapp...@heavy-multi.html

  * igt@gem_lmem_swapping@verify:
- shard-apl:  NOTRUN -> [SKIP][20] ([fdo#109271] / [i915#4613]) +1 
similar issue
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/shard-apl3/igt@gem_lmem_swapp...@verify.html

  * 

Re: [Intel-gfx] [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Ville Syrjälä
On Mon, Mar 07, 2022 at 10:32:36AM +, Matthew Auld wrote:
> On 04/03/2022 19:33, Ville Syrjälä wrote:
> > On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:
> >> The offset we get looks to be the exact start of DSM, but the
> >> inital_plane_vma expects the address to be relative.
> >>
> >> Signed-off-by: Matthew Auld 
> >> Cc: Thomas Hellström 
> >> ---
> >>   .../drm/i915/display/intel_plane_initial.c| 22 +++
> >>   1 file changed, 18 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
> >> b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >> index f797fcef18fc..b39d3a8dfe45 100644
> >> --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >> +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
> >> @@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
> >>if (!mem || plane_config->size == 0)
> >>return NULL;
> >>   
> >> -  base = round_down(plane_config->base,
> >> -I915_GTT_MIN_ALIGNMENT);
> >> -  size = round_up(plane_config->base + plane_config->size,
> >> -  mem->min_page_size);
> >> +  base = plane_config->base;
> >> +  if (IS_DGFX(i915)) {
> >> +  /*
> >> +   * On discrete the base address should be somewhere in LMEM, but
> >> +   * depending on the size of LMEM the base address might
> >> +   * intersect with the start of DSM, like on DG1, in which case
> >> +   * we need the relative address. In such cases we might also
> >> +   * need to choose between inital fb vs fbc, if space is limited.
> >> +   *
> >> +   * On future discrete HW, like DG2, we should be able to just
> >> +   * allocate directly from LMEM, due to larger LMEM size.
> >> +   */
> >> +  if (base >= i915->dsm.start)
> >> +  base -= i915->dsm.start;
> > 
> > Subsequent code expects the object to actually be inside stolen.
> > If that is not the case we should just give up.
> 
> Thanks for taking a look at this. Is that subsequent code outside 
> initial_plane_vma()? In the next patch this is now using LMEM directly 
> for dg2. Would that blow up somewhere else?

It uses i915_gem_object_create_stolen_for_preallocated() which assumes
the stuff is inside stolen.

> > The fact that we fail to confirm any of that on integrated
> > parts has always bugged me, but not enough to actually do
> > anything about it. Such a check would be somewhat more involved
> > since we'd have to look at the PTEs. But on discrete sounds like
> > we can get away with a trivial check.
> 
> Which PTEs?

The PTEs the plane is actually using. We have no idea where they
actually point to and just assume they represent a 1:1 mapping of
stolen.

I suppose with lmem we'll just start assuming a 1:1 mapping of
the whole lmem rather than just stolen.

-- 
Ville Syrjälä
Intel


Re: [Intel-gfx] [PATCH v3 i-g-t] lib/intel_mmio: Fix mmapped resources not unmapped on fini

2022-03-07 Thread Kamil Konieczny
Hi Janusz,

Dnia 2022-03-07 at 16:06:10 +0100, Janusz Krzysztofik napisał(a):
> Hi Kamil,
> 
> On Monday, 7 March 2022 14:23:30 CET Kamil Konieczny wrote:
> > Hi Janusz,
> > 
> > Dnia 2022-03-07 at 09:26:43 +0100, Janusz Krzysztofik napisał(a):
> > > Commit 5f3cfa485eb4 ("lib: Use safe wrappers around libpciaccess
> > > initialization functions") took care of not leaking memory allocated by
> > > pci_system_init() but didn't take care of users potentially attempting to
> > > reinitialize global data maintained by libpciaccess.  For example,
> > > intel_register_access_init() mmaps device's PCI BAR0 resource with
> > > pci_device_map_range() but intel_register_access_fini() doesn't unmap it
> > > and next call to intel_register_access_init() fails on attempt to mmap it
> > > again.
> > > 
> > > Fix it, and also provide intel_mmio_unmap_*() counterparts to public
> > > functions intel_mmio_use_pci_bar() and intel_mmio_use_dump_file().
> > > 
> > > v2: apply last minute fixes, cached but unfortunately not committed before
> > > sending
> > > v3: use .pci_device_id field content as an indicator of arg initialization
> > > via intel_register_access_init(),
> > >   - improve checks of argument initialization status,
> > >   - shorten warning messages (Kamil),
> > >   - don't fill .mmio_size field until initialization succeeds (Kamil)
> > > 
> > > Signed-off-by: Janusz Krzysztofik 
> > > Cc: Kamil Konieczny 
> > > ---
> > >  lib/intel_io.h   |  4 +++
> > >  lib/intel_mmio.c | 64 +---
> > >  2 files changed, 65 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/lib/intel_io.h b/lib/intel_io.h
> > > index 1cfe4fb6b9..ea2649d9bc 100644
> > > --- a/lib/intel_io.h
> > > +++ b/lib/intel_io.h
> > > @@ -49,6 +49,8 @@ struct intel_register_map {
> > >  
> > >  struct intel_mmio_data {
> > >   void *igt_mmio;
> > > + size_t mmio_size;
> > > + struct pci_device *dev;
> > >   struct intel_register_map map;
> > >   uint32_t pci_device_id;
> > >   int key;
> > > @@ -57,7 +59,9 @@ struct intel_mmio_data {
> > >  
> > >  void intel_mmio_use_pci_bar(struct intel_mmio_data *mmio_data,
> > >   struct pci_device *pci_dev);
> > > +void intel_mmio_unmap_pci_bar(struct intel_mmio_data *mmio_data);
> > >  void intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char 
> > > *file);
> > > +void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data);
> > >  
> > >  int intel_register_access_init(struct intel_mmio_data *mmio_data,
> > >  struct pci_device *pci_dev, int safe, int fd);
> > > diff --git a/lib/intel_mmio.c b/lib/intel_mmio.c
> > > index 667a69f5aa..d6ce0ee3ea 100644
> > > --- a/lib/intel_mmio.c
> > > +++ b/lib/intel_mmio.c
> > > @@ -82,6 +82,8 @@ void *igt_global_mmio;
> > >   * Sets also up mmio_data->igt_mmio to point at the data contained
> > >   * in @file. This allows the same code to get reused for dumping and 
> > > decoding
> > >   * from running hardware as from register dumps.
> > > + *
> > > + * Users are expected to call intel_mmio_unmap_dump_file() after use.
> > >   */
> > >  void
> > >  intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char *file)
> > > @@ -99,11 +101,32 @@ intel_mmio_use_dump_file(struct intel_mmio_data 
> > > *mmio_data, char *file)
> > >   igt_fail_on_f(mmio_data->igt_mmio == MAP_FAILED,
> > > "Couldn't mmap %s\n", file);
> > >  
> > > + mmio_data->mmio_size = st.st_size;
> > >   igt_global_mmio = mmio_data->igt_mmio;
> > >  
> > >   close(fd);
> > >  }
> > >  
> > > +/**
> > > + * intel_mmio_unmap_dump_file:
> > > + * @mmio_data:  mmio structure for IO operations
> > > + *
> > > + * Unmaps a dump file mmapped with intel_mmio_use_dump_file()
> > > + */
> > > +void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data)
> > > +{
> > > + if (igt_warn_on_f(mmio_data->dev,
> > > +   "test bug: arg initialized with 
> > > intel_mmio_use_pci_bar()\n"))
> > > + return;
> > 
> > Please add a global description about this kind of errors, this
> > one is for using unmap when mmio was mmap-ed from other mmap
> > type.
> 
> Can you please be more specific in what you mean by "global description of 
> this kind of errors"?  A more detailed warning?  A comment?  If the latter 
> then how would you like me to make it global?

Yes, I was thinking about comment at begin of file, but maybe
it is better to change warning message like below.

> 
> If you just don't like the reference to intel_mmio_use_pci_bar() here then 
> would you be satisfied with something like "test bug: arg initialized by a 
> method other than intel_mmio_use_dump_file()\n"?

Yes, this sounds good.

> 
> > > + if (igt_warn_on_f(!mmio_data->mmio_size,
> > > +   "test bug: arg not initialized\n"))
> > > + return;
> > 
> > Can we replace this with one check igt_global_mmio != NULL ?
> > Something like:
> > 
> > if 

[Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/gtt: reduce overzealous alignment constraints for GGTT (rev3)

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915/gtt: reduce overzealous alignment constraints for GGTT (rev3)
URL   : https://patchwork.freedesktop.org/series/100991/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11332_full -> Patchwork_22497_full


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_22497_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_22497_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (13 -> 13)
--

  No changes in participating hosts

Possible new issues
---

  Here are the unknown changes that may have been introduced in 
Patchwork_22497_full:

### IGT changes ###

 Possible regressions 

  * igt@gem_softpin@safe-alignment:
- shard-glk:  NOTRUN -> [DMESG-WARN][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-glk3/igt@gem_soft...@safe-alignment.html

  * 
igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-b-edp-1-scaler-with-clipping-clamping:
- shard-iclb: [PASS][2] -> [INCOMPLETE][3]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-iclb5/igt@kms_plane_scaling@scaler-with-clipping-clamp...@pipe-b-edp-1-scaler-with-clipping-clamping.html
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-iclb2/igt@kms_plane_scaling@scaler-with-clipping-clamp...@pipe-b-edp-1-scaler-with-clipping-clamping.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_pm_lpsp@screens-disabled:
- {shard-rkl}:[SKIP][4] ([i915#1902]) -> ([INCOMPLETE][5], 
[SKIP][6]) ([i915#1902])
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-rkl-6/igt@i915_pm_l...@screens-disabled.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-rkl-5/igt@i915_pm_l...@screens-disabled.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-rkl-4/igt@i915_pm_l...@screens-disabled.html

  
Known issues


  Here are the changes found in Patchwork_22497_full that come from known 
issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_fair@basic-deadline:
- shard-skl:  NOTRUN -> [FAIL][7] ([i915#2846])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-skl6/igt@gem_exec_f...@basic-deadline.html

  * igt@gem_exec_fair@basic-none-solo@rcs0:
- shard-kbl:  NOTRUN -> [FAIL][8] ([i915#2842])
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-kbl3/igt@gem_exec_fair@basic-none-s...@rcs0.html

  * igt@gem_exec_fair@basic-none-vip@rcs0:
- shard-kbl:  [PASS][9] -> [FAIL][10] ([i915#2842]) +2 similar 
issues
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-kbl7/igt@gem_exec_fair@basic-none-...@rcs0.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-kbl4/igt@gem_exec_fair@basic-none-...@rcs0.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
- shard-tglb: [PASS][11] -> [FAIL][12] ([i915#2842])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/shard-tglb2/igt@gem_exec_fair@basic-pace-sh...@rcs0.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-tglb2/igt@gem_exec_fair@basic-pace-sh...@rcs0.html

  * igt@gem_exec_params@secure-non-root:
- shard-iclb: NOTRUN -> [SKIP][13] ([fdo#112283])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-iclb7/igt@gem_exec_par...@secure-non-root.html

  * igt@gem_huc_copy@huc-copy:
- shard-skl:  NOTRUN -> [SKIP][14] ([fdo#109271] / [i915#2190])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-skl10/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@heavy-verify-multi:
- shard-kbl:  NOTRUN -> [SKIP][15] ([fdo#109271] / [i915#4613])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-kbl3/igt@gem_lmem_swapp...@heavy-verify-multi.html

  * igt@gem_lmem_swapping@verify:
- shard-apl:  NOTRUN -> [SKIP][16] ([fdo#109271] / [i915#4613]) +1 
similar issue
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-apl8/igt@gem_lmem_swapp...@verify.html

  * igt@gem_lmem_swapping@verify-random:
- shard-iclb: NOTRUN -> [SKIP][17] ([i915#4613])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-iclb7/igt@gem_lmem_swapp...@verify-random.html

  * igt@gem_media_vme:
- shard-skl:  NOTRUN -> [SKIP][18] ([fdo#109271]) +132 similar 
issues
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/shard-skl10/igt@gem_media_vme.html

  * 

Re: [Intel-gfx] [PATCH v12 1/6] drm: Add arch arm64 for drm_clflush_virt_range

2022-03-07 Thread Michael Cheng

Ah Thanks for the great feedback!

@Lucas or @Matt, could you please chime in?

Michael Cheng

On 2022-03-02 11:10 a.m., Robin Murphy wrote:

On 2022-03-02 15:55, Michael Cheng wrote:

Thanks for the feedback Robin!

Sorry my choices of word weren't that great, but what I meant is to 
understand how ARM flushes a range of dcache for device drivers, and 
not an equal to x86 clflush.


I believe the concern is if the CPU writes an update, that update 
might only be sitting in the CPU cache and never make it to device 
memory where the device can see it; there are specific places that we 
are supposed to flush the CPU caches to make sure our updates are 
visible to the hardware.


Ah, OK, if it's more about ordering, and it's actually write buffers 
rather than caches that you care about flushing, then we might be a 
lot safer, phew!


For a very simple overview, in a case where the device itself needs to 
observe memory writes in the correct order, e.g.:


data_descriptor.valid = 1;

clflush(_descriptor);

command_descriptor.data = _descriptor

writel(/* control register to read command to then read data */)

then dma_wmb() between the first two writes should be the right tool 
to ensure that the command does not observe the command update while 
the data update is still sat somewhere in a CPU write buffer.


If you want a slightly stronger notion that, at a given point, all 
prior writes have actually been issued and should now be visible 
(rather than just that they won't become visible in the wrong order 
whenever they do), then wmb() should suffice on arm64.


Note that wioth arm64 memory types, a Non-Cacheable mapping of DRAM 
for a non-coherent DMA mapping, or of VRAM in a prefetchable BAR, can 
still be write-buffered, so barriers still matter even when actual 
cache maintenance ops don't (and as before if you're trying to perform 
cache maintenance outside the DMA API then you've already lost 
anyway). MMIO registers should be mapped as Device memory via 
ioremap(), which is not bufferable, hence the barrier implicit in 
writel() effectively pushes out any prior buffered writes ahead of a 
register write, which is why we don't need to worry about this most of 
the time.


This is only a very rough overview, though, and I'm not familiar 
enough with x86 semantics, your hardware, or the exact use-case to be 
able to say whether barriers alone are anywhere near the right answer 
or not.


Robin.



+Matt Roper

Matt, Lucas, any feed back here?

On 2022-03-02 4:49 a.m., Robin Murphy wrote:

On 2022-02-25 19:27, Michael Cheng wrote:

Hi Robin,

[ +arm64 maintainers for their awareness, which would have been a 
good thing to do from the start ]


  * Thanks for adding the arm64 maintainer and sorry I didn't rope 
them

    in sooner.

Why does i915 need to ensure the CPU's instruction cache is 
coherent with its data cache? Is it a self-modifying driver?


  * Also thanks for pointing this out. Initially I was using
    dcache_clean_inval_poc, which seem to be the equivalently to what
    x86 is doing for dcache flushing, but it was giving me build 
errors

    since its not on the global list of kernel symbols. And after
    revisiting the documentation for caches_clean_inval_pou, it won't
    fly for what we are trying to do. Moving forward, what would 
you (or
    someone in the ARM community) suggest we do? Could it be 
possible to

    export dcache_clean_inval_poc as a global symbol?


Unlikely, unless something with a legitimate need for CPU-centric 
cache maintenance like kexec or CPU hotplug ever becomes modular.


In the case of a device driver, it's not even the basic issues of 
assuming to find direct equivalents to x86 semantics in other CPU 
architectures, or effectively reinventing parts of the DMA API, it's 
even bigger than that. Once you move from being integrated in a 
single vendor's system architecture to being on a discrete card, you 
fundamentally *no longer have any control over cache coherency*. 
Whether the host CPU architecture happens to be AArch64, RISC-V, or 
whatever doesn't really matter, you're at the mercy of 3rd-party 
PCIe and interconnect IP vendors, and SoC integrators. You'll find 
yourself in systems where PCIe simply cannot snoop any caches, where 
you'd better have the correct DMA API calls in place to have any 
hope of even the most basic functionality working properly; you'll 
find yourself in systems where even if the PCIe root complex claims 
to support No Snoop, your uncached traffic will still end up 
snooping stale data that got prefetched back into caches you thought 
you'd invalidated; you'll find yourself in systems where your memory 
attributes may or may not get forcibly rewritten by an IOMMU 
depending on the kernel config and/or command line.


It's not about simply finding a substitute for clflush, it's that 
the reasons you have for using clflush in the first place can no 
longer be assumed to be valid.


Robin.


On 2022-02-25 

Re: [Intel-gfx] [PATCH] drm: remove min_order BUG_ON check

2022-03-07 Thread Matthew Auld

On 07/03/2022 14:37, Arunpravin wrote:

place BUG_ON(order < min_order) outside do..while
loop as it fails Unigine Heaven benchmark.

Unigine Heaven has buffer allocation requests for
example required pages are 161 and alignment request
is 128. To allocate the remaining 33 pages, continues
the iteration to find the order value which is 5 and
when it compares with min_order = 7, enables the
BUG_ON(). To avoid this problem, placed the BUG_ON
check outside of do..while loop.

Signed-off-by: Arunpravin 
---
  drivers/gpu/drm/drm_buddy.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 72f52f293249..ed94c56b720f 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -669,10 +669,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
order = fls(pages) - 1;
min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
  
+	BUG_ON(order < min_order);


Isn't the issue that we are allowing a size that is not aligned to the 
requested min_page_size? Should we not fix the caller(and throw a normal 
error here), or perhaps add the round_up() here instead?


i.e if someone does:

alloc_blocks(mm, 0, end, 4096, 1<<16, , flags);

This will still trigger the BUG_ON() even if we move it out of the loop, 
AFAICT.



+
do {
order = min(order, (unsigned int)fls(pages) - 1);
BUG_ON(order > mm->max_order);
-   BUG_ON(order < min_order);
  
  		do {

if (flags & DRM_BUDDY_RANGE_ALLOCATION)

base-commit: 8025c79350b90e5a8029234d433578f12abbae2b


Re: [Intel-gfx] [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread Christoph Hellwig
On Mon, Mar 07, 2022 at 03:29:35PM +0200, Jarkko Sakkinen wrote:
> So what would you suggest to sort out the issue? I'm happy to go with
> ioctl if nothing else is acceptable.

PLenty of drivers treat all mmaps as if MAP_POPULATE was specified,
typically by using (io_)remap_pfn_range.  If there any reason to only
optionally have the pre-fault semantics for sgx?  If not this should
be really simple.  And if we have a real need for it to be optional
we'll just need to find a sane way to pass that information to ->mmap.


[Intel-gfx] ✓ Fi.CI.BAT: success for drm: remove min_order BUG_ON check

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm: remove min_order BUG_ON check
URL   : https://patchwork.freedesktop.org/series/101108/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11332 -> Patchwork_22502


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/index.html

Participating hosts (44 -> 40)
--

  Additional (1): fi-pnv-d510 
  Missing(5): shard-tglu fi-bsw-cyan shard-rkl shard-dg1 fi-bdw-samus 

Known issues


  Here are the changes found in Patchwork_22502 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s3:
- fi-skl-6600u:   NOTRUN -> [FAIL][1] ([i915#4547])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/fi-skl-6600u/igt@gem_exec_susp...@basic-s3.html

  * igt@gem_exec_suspend@basic-s3@smem:
- fi-bdw-5557u:   [PASS][2] -> [INCOMPLETE][3] ([i915#146])
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-bdw-5557u/igt@gem_exec_suspend@basic...@smem.html
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/fi-bdw-5557u/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-pnv-d510:NOTRUN -> [SKIP][4] ([fdo#109271]) +57 similar issues
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/fi-pnv-d510/igt@gem_huc_c...@huc-copy.html

  * igt@i915_selftest@live@hangcheck:
- fi-snb-2600:[PASS][5] -> [INCOMPLETE][6] ([i915#3921])
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-snb-2600/igt@i915_selftest@l...@hangcheck.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/fi-snb-2600/igt@i915_selftest@l...@hangcheck.html

  
 Possible fixes 

  * igt@i915_selftest@live@gt_heartbeat:
- {fi-tgl-dsi}:   [DMESG-FAIL][7] ([i915#541]) -> [PASS][8]
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-tgl-dsi/igt@i915_selftest@live@gt_heartbeat.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/fi-tgl-dsi/igt@i915_selftest@live@gt_heartbeat.html

  * igt@i915_selftest@live@hangcheck:
- bat-dg1-6:  [DMESG-FAIL][9] ([i915#4957]) -> [PASS][10]
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-dg1-6/igt@i915_selftest@l...@hangcheck.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/bat-dg1-6/igt@i915_selftest@l...@hangcheck.html
- bat-dg1-5:  [DMESG-FAIL][11] ([i915#4494] / [i915#4957]) -> 
[PASS][12]
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-dg1-5/igt@i915_selftest@l...@hangcheck.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/bat-dg1-5/igt@i915_selftest@l...@hangcheck.html

  * igt@i915_selftest@live@requests:
- {bat-rpls-2}:   [DMESG-FAIL][13] ([i915#5087]) -> [PASS][14]
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-rpls-2/igt@i915_selftest@l...@requests.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/bat-rpls-2/igt@i915_selftest@l...@requests.html

  * igt@kms_flip@basic-flip-vs-modeset@a-edp1:
- {bat-adlp-6}:   [DMESG-WARN][15] ([i915#3576]) -> [PASS][16]
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-adlp-6/igt@kms_flip@basic-flip-vs-mode...@a-edp1.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22502/bat-adlp-6/igt@kms_flip@basic-flip-vs-mode...@a-edp1.html

  
  {name}: This element is suppressed. This means it is ignored when computing
  the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#146]: https://gitlab.freedesktop.org/drm/intel/issues/146
  [i915#3576]: https://gitlab.freedesktop.org/drm/intel/issues/3576
  [i915#3921]: https://gitlab.freedesktop.org/drm/intel/issues/3921
  [i915#4391]: https://gitlab.freedesktop.org/drm/intel/issues/4391
  [i915#4494]: https://gitlab.freedesktop.org/drm/intel/issues/4494
  [i915#4547]: https://gitlab.freedesktop.org/drm/intel/issues/4547
  [i915#4957]: https://gitlab.freedesktop.org/drm/intel/issues/4957
  [i915#5087]: https://gitlab.freedesktop.org/drm/intel/issues/5087
  [i915#5193]: https://gitlab.freedesktop.org/drm/intel/issues/5193
  [i915#541]: https://gitlab.freedesktop.org/drm/intel/issues/541


Build changes
-

  * Linux: CI_DRM_11332 -> Patchwork_22502

  CI-20190529: 20190529
  CI_DRM_11332: 8025c79350b90e5a8029234d433578f12abbae2b @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6366: 8c0bb07b7b4d9b724a897a7665fb9b1c450b2cb6 @ 
https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_22502: 54fbfb3df283ade8052a289018823dac3676bf44 @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

54fbfb3df283 drm: remove 

Re: [Intel-gfx] [PATCH v3 4/6] drm/i915/gem: Add extra pages in ttm_tt for ccs data

2022-03-07 Thread Matthew Auld

On 07/03/2022 13:40, Ramalingam C wrote:

On Xe-HP and later devices, dedicated compression control state (CCS)
stored in local memory is used for each surface, to support the
3D and media compression formats.

The memory required for the CCS of the entire local memory is 1/256 of
the local memory size. So before the kernel boot, the required memory
is reserved for the CCS data and a secure register will be programmed
with the CCS base address

So when an object is allocated in local memory, dont need to explicitly
allocate the space for ccs data. But when the obj is evicted into the
smem, to hold the compression related data along with the obj extra space
is needed in smem. i.e obj_size + (obj_size/256).

Hence when a smem pages are allocated for an obj with lmem placement
possibility we create with the extra pages required for the ccs data for
the obj size.

v2:
   Used imperative wording [Thomas]

Signed-off-by: Ramalingam C 
cc: Christian Koenig 
cc: Hellstrom Thomas 
Reviewed-by: Thomas Hellström 
---
  drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 23 ++-
  1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 1a8262f5f692..c7a36861c38d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -20,6 +20,7 @@
  #include "gem/i915_gem_ttm.h"
  #include "gem/i915_gem_ttm_move.h"
  #include "gem/i915_gem_ttm_pm.h"
+#include "gt/intel_gpu_commands.h"
  
  #define I915_TTM_PRIO_PURGE 0

  #define I915_TTM_PRIO_NO_PAGES  1
@@ -255,12 +256,27 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = {
.release = i915_ttm_tt_release
  };
  
+static inline bool

+i915_gem_object_has_lmem_placement(struct drm_i915_gem_object *obj)
+{
+   int i;
+
+   for (i = 0; i < obj->mm.n_placements; i++)
+   if (obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL)
+   return true;
+
+   return false;
+}
+
  static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 uint32_t page_flags)
  {
+   struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+bdev);
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+   unsigned long ccs_pages = 0;
enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;
@@ -283,7 +299,12 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
  
-	ret = ttm_tt_init(_tt->ttm, bo, page_flags, caching, 0);

+   if (HAS_FLAT_CCS(i915) && i915_gem_object_has_lmem_placement(obj))
+   ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size,
+ NUM_BYTES_PER_CCS_BYTE),
+PAGE_SIZE);


Did you figure out how to handle the case where we have LMEM + SMEM, and 
are unable to place the object into LMEM, and then it just ends up being 
kept in SMEM? AFAIK the vm.insert_entries code has always just assumed 
that the vma sg_table size is the same as the vma->size, and so will 
happily create PTEs for the hidden ccs page(s), which might corrupt the 
users vm, depending on the exact layout.


Also it looks like the _shmem_writeback() call should now use 
ttm_tt->num_pages, instead of the object size?



+
+   ret = ttm_tt_init(_tt->ttm, bo, page_flags, caching, ccs_pages);
if (ret)
goto err_free;
  


Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Dave Hansen
On 3/7/22 03:27, Jarkko Sakkinen wrote:
> But e.g. in __mm_populate() anything with (VM_IO | VM_PFNMAP) gets
> filtered out and never reach that function.
> 
> I don't know unorthodox that'd be but could we perhaps have a VM
> flag for SGX?

SGX only works on a subset of the chips from one vendor on one
architecture.  That doesn't seem worth burning a VM flag.


Re: [Intel-gfx] [PATCH 0/6] Remove usage of list iterator past the loop body

2022-03-07 Thread David Laight
From: Dan Carpenter
> Sent: 07 March 2022 15:01
> 
> Updating this API is risky because some places rely on the old behavior
> and not all of them have been updated.  Here are some additional places
> you might want to change.

I really can't help thinking that trying to merge this patch is
actually impossible.
It affects far too many different parts of the tree.

Since (I believe) this is a doubly linked list with forwards and
backwards pointers that point to a 'node' (not that there is a
nice comment to that effect in the header - and there are lots of
ways to do linked lists) the 'head' pretty much has to be a 'node'.

I'd write the following new defines (but I might be using
the old names here):

list_first(head, field) First item, NULL if empty.
list_last(head, field) Last item NULL if empty.
list_next(head, item, field) Item after 'item', NULL if last.
list_prev(head, item. field) Item before 'item', NULL if first.

You get (something like):
#define list_first(head, field) \
head->next ==  ? NULL : list_item(head->next, field)
(probably needs typeof(item) from somewhere).

The iterator loop is then just:
#define loop_iterate(item, head, field) \
for (item = list_first(head, field); item; \
item = list_next(head, item, field)

I'm not sure, but making the 'head' be a structure that contains
a single member that is a 'node' might help type checking.

Then all the code that uses the current defines can slowly be
moved over (probably a couple of releases) before the existing
defines are deleted.

That should simplify all the open-coded search loops that are
just as likely to be buggy (possibly more so).

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [Intel-gfx] [PATCH v3 i-g-t] lib/intel_mmio: Fix mmapped resources not unmapped on fini

2022-03-07 Thread Janusz Krzysztofik
Hi Kamil,

On Monday, 7 March 2022 14:23:30 CET Kamil Konieczny wrote:
> Hi Janusz,
> 
> Dnia 2022-03-07 at 09:26:43 +0100, Janusz Krzysztofik napisał(a):
> > Commit 5f3cfa485eb4 ("lib: Use safe wrappers around libpciaccess
> > initialization functions") took care of not leaking memory allocated by
> > pci_system_init() but didn't take care of users potentially attempting to
> > reinitialize global data maintained by libpciaccess.  For example,
> > intel_register_access_init() mmaps device's PCI BAR0 resource with
> > pci_device_map_range() but intel_register_access_fini() doesn't unmap it
> > and next call to intel_register_access_init() fails on attempt to mmap it
> > again.
> > 
> > Fix it, and also provide intel_mmio_unmap_*() counterparts to public
> > functions intel_mmio_use_pci_bar() and intel_mmio_use_dump_file().
> > 
> > v2: apply last minute fixes, cached but unfortunately not committed before
> > sending
> > v3: use .pci_device_id field content as an indicator of arg initialization
> > via intel_register_access_init(),
> >   - improve checks of argument initialization status,
> >   - shorten warning messages (Kamil),
> >   - don't fill .mmio_size field until initialization succeeds (Kamil)
> > 
> > Signed-off-by: Janusz Krzysztofik 
> > Cc: Kamil Konieczny 
> > ---
> >  lib/intel_io.h   |  4 +++
> >  lib/intel_mmio.c | 64 +---
> >  2 files changed, 65 insertions(+), 3 deletions(-)
> > 
> > diff --git a/lib/intel_io.h b/lib/intel_io.h
> > index 1cfe4fb6b9..ea2649d9bc 100644
> > --- a/lib/intel_io.h
> > +++ b/lib/intel_io.h
> > @@ -49,6 +49,8 @@ struct intel_register_map {
> >  
> >  struct intel_mmio_data {
> > void *igt_mmio;
> > +   size_t mmio_size;
> > +   struct pci_device *dev;
> > struct intel_register_map map;
> > uint32_t pci_device_id;
> > int key;
> > @@ -57,7 +59,9 @@ struct intel_mmio_data {
> >  
> >  void intel_mmio_use_pci_bar(struct intel_mmio_data *mmio_data,
> > struct pci_device *pci_dev);
> > +void intel_mmio_unmap_pci_bar(struct intel_mmio_data *mmio_data);
> >  void intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char 
> > *file);
> > +void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data);
> >  
> >  int intel_register_access_init(struct intel_mmio_data *mmio_data,
> >struct pci_device *pci_dev, int safe, int fd);
> > diff --git a/lib/intel_mmio.c b/lib/intel_mmio.c
> > index 667a69f5aa..d6ce0ee3ea 100644
> > --- a/lib/intel_mmio.c
> > +++ b/lib/intel_mmio.c
> > @@ -82,6 +82,8 @@ void *igt_global_mmio;
> >   * Sets also up mmio_data->igt_mmio to point at the data contained
> >   * in @file. This allows the same code to get reused for dumping and 
> > decoding
> >   * from running hardware as from register dumps.
> > + *
> > + * Users are expected to call intel_mmio_unmap_dump_file() after use.
> >   */
> >  void
> >  intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char *file)
> > @@ -99,11 +101,32 @@ intel_mmio_use_dump_file(struct intel_mmio_data 
> > *mmio_data, char *file)
> > igt_fail_on_f(mmio_data->igt_mmio == MAP_FAILED,
> >   "Couldn't mmap %s\n", file);
> >  
> > +   mmio_data->mmio_size = st.st_size;
> > igt_global_mmio = mmio_data->igt_mmio;
> >  
> > close(fd);
> >  }
> >  
> > +/**
> > + * intel_mmio_unmap_dump_file:
> > + * @mmio_data:  mmio structure for IO operations
> > + *
> > + * Unmaps a dump file mmapped with intel_mmio_use_dump_file()
> > + */
> > +void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data)
> > +{
> > +   if (igt_warn_on_f(mmio_data->dev,
> > + "test bug: arg initialized with 
> > intel_mmio_use_pci_bar()\n"))
> > +   return;
> 
> Please add a global description about this kind of errors, this
> one is for using unmap when mmio was mmap-ed from other mmap
> type.

Can you please be more specific in what you mean by "global description of 
this kind of errors"?  A more detailed warning?  A comment?  If the latter 
then how would you like me to make it global?

If you just don't like the reference to intel_mmio_use_pci_bar() here then 
would you be satisfied with something like "test bug: arg initialized by a 
method other than intel_mmio_use_dump_file()\n"?

> > +   if (igt_warn_on_f(!mmio_data->mmio_size,
> > + "test bug: arg not initialized\n"))
> > +   return;
> 
> Can we replace this with one check igt_global_mmio != NULL ?
> Something like:
> 
>   if (igt_warn_on_f(!igt_global_mmio,
> "mmio regs not mmap-ed\n"))
>   return;
> 
> Or should we add this before all other checks in unmap functions
> and keep this additional check ?

Why igt_global_mmio again?  I still think this global variable is broken and 
users should just use the structure they pass to intel_mmio_use_*() or 
intel_register_access_init(), then introducing another a dependency on 

[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915/ttm: Evict and restore of compressed object

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915/ttm: Evict and restore of compressed object
URL   : https://patchwork.freedesktop.org/series/101106/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11332 -> Patchwork_22501


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_22501 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_22501, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/index.html

Participating hosts (44 -> 39)
--

  Additional (1): fi-tgl-1115g4 
  Missing(6): shard-tglu fi-bsw-cyan shard-rkl shard-dg1 bat-jsl-2 
fi-bdw-samus 

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_22501:

### IGT changes ###

 Possible regressions 

  * igt@i915_selftest@live@migrate:
- fi-bsw-kefka:   [PASS][1] -> [DMESG-FAIL][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-bsw-kefka/igt@i915_selftest@l...@migrate.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-bsw-kefka/igt@i915_selftest@l...@migrate.html
- fi-kbl-8809g:   [PASS][3] -> [DMESG-FAIL][4]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-kbl-8809g/igt@i915_selftest@l...@migrate.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-kbl-8809g/igt@i915_selftest@l...@migrate.html
- fi-kbl-x1275:   [PASS][5] -> [DMESG-FAIL][6]
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-kbl-x1275/igt@i915_selftest@l...@migrate.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-kbl-x1275/igt@i915_selftest@l...@migrate.html
- fi-rkl-guc: [PASS][7] -> [DMESG-FAIL][8]
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-rkl-guc/igt@i915_selftest@l...@migrate.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-rkl-guc/igt@i915_selftest@l...@migrate.html
- fi-skl-6700k2:  [PASS][9] -> [DMESG-FAIL][10]
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-skl-6700k2/igt@i915_selftest@l...@migrate.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-skl-6700k2/igt@i915_selftest@l...@migrate.html
- fi-cfl-guc: [PASS][11] -> [DMESG-FAIL][12]
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-cfl-guc/igt@i915_selftest@l...@migrate.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-cfl-guc/igt@i915_selftest@l...@migrate.html
- fi-bsw-n3050:   [PASS][13] -> [DMESG-FAIL][14]
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-bsw-n3050/igt@i915_selftest@l...@migrate.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-bsw-n3050/igt@i915_selftest@l...@migrate.html
- fi-cfl-8700k:   [PASS][15] -> [DMESG-FAIL][16]
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-cfl-8700k/igt@i915_selftest@l...@migrate.html
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-cfl-8700k/igt@i915_selftest@l...@migrate.html
- fi-bxt-dsi: [PASS][17] -> [DMESG-FAIL][18]
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-bxt-dsi/igt@i915_selftest@l...@migrate.html
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-bxt-dsi/igt@i915_selftest@l...@migrate.html
- fi-cfl-8109u:   [PASS][19] -> [DMESG-FAIL][20]
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-cfl-8109u/igt@i915_selftest@l...@migrate.html
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-cfl-8109u/igt@i915_selftest@l...@migrate.html
- fi-glk-j4005:   [PASS][21] -> [DMESG-FAIL][22]
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-glk-j4005/igt@i915_selftest@l...@migrate.html
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-glk-j4005/igt@i915_selftest@l...@migrate.html
- bat-dg1-5:  [PASS][23] -> [DMESG-FAIL][24]
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-dg1-5/igt@i915_selftest@l...@migrate.html
   [24]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/bat-dg1-5/igt@i915_selftest@l...@migrate.html
- fi-kbl-7567u:   [PASS][25] -> [DMESG-FAIL][26]
   [25]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-kbl-7567u/igt@i915_selftest@l...@migrate.html
   [26]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22501/fi-kbl-7567u/igt@i915_selftest@l...@migrate.html
- fi-kbl-7500u:   [PASS][27] -> [DMESG-FAIL][28]
   [27]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-kbl-7500u/igt@i915_selftest@l...@migrate.html
   [28]: 

[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm: remove min_order BUG_ON check

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm: remove min_order BUG_ON check
URL   : https://patchwork.freedesktop.org/series/101108/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
54fbfb3df283 drm: remove min_order BUG_ON check
-:27: WARNING:AVOID_BUG: Avoid crashing the kernel - try using WARN_ON & 
recovery code rather than BUG() or BUG_ON()
#27: FILE: drivers/gpu/drm/drm_buddy.c:672:
+   BUG_ON(order < min_order);

total: 0 errors, 1 warnings, 0 checks, 12 lines checked




Re: [Intel-gfx] [PATCH 0/6] Remove usage of list iterator past the loop body

2022-03-07 Thread Dan Carpenter
Updating this API is risky because some places rely on the old behavior
and not all of them have been updated.  Here are some additional places
you might want to change.

drivers/usb/host/uhci-q.c:466 link_async() warn: iterator used outside loop: 
'pqh'
drivers/infiniband/core/mad.c:968 ib_get_rmpp_segment() warn: iterator used 
outside loop: 'mad_send_wr->cur_seg'
drivers/opp/debugfs.c:208 opp_migrate_dentry() warn: iterator used outside 
loop: 'new_dev'
drivers/staging/greybus/audio_codec.c:602 gbcodec_mute_stream() warn: iterator 
used outside loop: 'module'
drivers/staging/media/atomisp/pci/atomisp_acc.c:508 
atomisp_acc_load_extensions() warn: iterator used outside loop: 'acc_fw'
drivers/perf/thunderx2_pmu.c:814 tx2_uncore_pmu_init_dev() warn: iterator used 
outside loop: 'rentry'
drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c:111 
nvkm_control_mthd_pstate_attr() warn: iterator used outside loop: 'pstate'
drivers/gpu/drm/panfrost/panfrost_mmu.c:203 panfrost_mmu_as_get() warn: 
iterator used outside loop: 'lru_mmu'
drivers/media/usb/uvc/uvc_v4l2.c:885 uvc_ioctl_enum_input() warn: iterator used 
outside loop: 'iterm'
drivers/media/usb/uvc/uvc_v4l2.c:896 uvc_ioctl_enum_input() warn: iterator used 
outside loop: 'iterm'
drivers/scsi/dc395x.c:3596 device_alloc() warn: iterator used outside loop: 'p'
drivers/net/ethernet/mellanox/mlx4/alloc.c:379 __mlx4_alloc_from_zone() warn: 
iterator used outside loop: 'curr_node'
fs/ocfs2/dlm/dlmdebug.c:573 lockres_seq_start() warn: iterator used outside 
loop: 'res'

This patchset fixes 3 bugs.  Initially when it's merged it's probably
going to introduce some bugs because there are likely other places which
rely on the old behavior.

In an ideal world, with the new API the compiler would warn about
uninitialized variables, but unfortunately that warning is disabled by
default so we still have to rely on kbuild/Clang/Smatch to find the
bugs.

But hopefully the new API encourages people to write clearer code so it
prevents bugs in the long run.

regards,
dan carpenter



Re: [Intel-gfx] [PATCH] drm: remove min_order BUG_ON check

2022-03-07 Thread Jani Nikula
On Mon, 07 Mar 2022, Arunpravin  wrote:
> place BUG_ON(order < min_order) outside do..while
> loop as it fails Unigine Heaven benchmark.
>
> Unigine Heaven has buffer allocation requests for
> example required pages are 161 and alignment request
> is 128. To allocate the remaining 33 pages, continues
> the iteration to find the order value which is 5 and
> when it compares with min_order = 7, enables the
> BUG_ON(). To avoid this problem, placed the BUG_ON
> check outside of do..while loop.

How about turning these BUG_ON()s to WARN_ON()s with an error return?
What's the point in oopsing?

BR,
Jani.


>
> Signed-off-by: Arunpravin 
> ---
>  drivers/gpu/drm/drm_buddy.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 72f52f293249..ed94c56b720f 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -669,10 +669,11 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
>   order = fls(pages) - 1;
>   min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
>  
> + BUG_ON(order < min_order);
> +
>   do {
>   order = min(order, (unsigned int)fls(pages) - 1);
>   BUG_ON(order > mm->max_order);
> - BUG_ON(order < min_order);
>  
>   do {
>   if (flags & DRM_BUDDY_RANGE_ALLOCATION)
>
> base-commit: 8025c79350b90e5a8029234d433578f12abbae2b

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Matthew Wilcox
On Sun, Mar 06, 2022 at 03:41:54PM -0800, Dave Hansen wrote:
> In short: page faults stink.  The core kernel has lots of ways of
> avoiding page faults like madvise(MADV_WILLNEED) or mmap(MAP_POPULATE).
>  But, those only work on normal RAM that the core mm manages.
> 
> SGX is weird.  SGX memory is managed outside the core mm.  It doesn't
> have a 'struct page' and get_user_pages() doesn't work on it.  Its VMAs
> are marked with VM_IO.  So, none of the existing methods for avoiding
> page faults work on SGX memory.
> 
> This essentially helps extend existing "normal RAM" kernel ABIs to work
> for avoiding faults for SGX too.  SGX users want to enjoy all of the
> benefits of a delayed allocation policy (better resource use,
> overcommit, NUMA affinity) but without the cost of millions of faults.

We have a mechanism for dynamically reducing the number of page faults
already; it's just buried in the page cache code.  You have vma->vm_file,
which contains a file_ra_state.  You can use this to track where
recent faults have been and grow the size of the region you fault in
per page fault.  You don't have to (indeed probably don't want to) use
the same algorithm as the page cache, but the _principle_ is the same --
were recent speculative faults actually used; should we grow the number
of pages actually faulted in, or is this a random sparse workload where
we want to allocate individual pages.

Don't rely on the user to ask.  They don't know.


Re: [Intel-gfx] [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread David Hildenbrand
On 07.03.22 15:22, Jarkko Sakkinen wrote:
> On Mon, Mar 07, 2022 at 11:12:44AM +0100, David Hildenbrand wrote:
>> On 06.03.22 06:32, Jarkko Sakkinen wrote:
>>> For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
>>> to use that for initializing the device memory by providing a new callback
>>> f_ops->populate() for the purpose.
>>>
>>> SGX patches are provided to show the callback in context.
>>>
>>> An obvious alternative is a ioctl but it is less elegant and requires
>>> two syscalls (mmap + ioctl) per memory range, instead of just one
>>> (mmap).
>>
>> What about extending MADV_POPULATE_READ | MADV_POPULATE_WRITE to support
>> VM_IO | VM_PFNMAP (as well?) ?
> 
> What would be a proper point to bind that behaviour? For mmap/mprotect it'd
> be probably populate_vma_page_range() because that would span both mmap()
> and mprotect() (Dave's suggestion in this thread).

MADV_POPULATE_* ends up in faultin_vma_page_range(), right next to
populate_vma_page_range(). So it might require a similar way to hook
into the driver I guess.

> 
> For MAP_POPULATE I did not have hard proof to show that it would be used
> by other drivers but for madvice() you can find at least a few ioctl
> based implementations:
> 
> $ git grep -e madv --and \( -e ioc \)  drivers/
> drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:int i915_gem_madvise_ioctl(struct 
> drm_device *dev, void *data,
> drivers/gpu/drm/i915/i915_driver.c: DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, 
> i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
> drivers/gpu/drm/i915/i915_gem.c:i915_gem_madvise_ioctl(struct drm_device 
> *dev, void *data,
> drivers/gpu/drm/msm/msm_drv.c:static int msm_ioctl_gem_madvise(struct 
> drm_device *dev, void *data,
> drivers/gpu/drm/msm/msm_drv.c:  DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  
> msm_ioctl_gem_madvise,  DRM_RENDER_ALLOW),
> drivers/gpu/drm/panfrost/panfrost_drv.c:static int 
> panfrost_ioctl_madvise(struct drm_device *dev, void *data,
> drivers/gpu/drm/vc4/vc4_drv.c:  DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, 
> vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
> drivers/gpu/drm/vc4/vc4_drv.h:int vc4_gem_madvise_ioctl(struct drm_device 
> *dev, void *data,
> drivers/gpu/drm/vc4/vc4_gem.c:int vc4_gem_madvise_ioctl(struct drm_device 
> *dev, void *data,
> 
> IMHO this also provides supportive claim for MAP_POPULATE, and yeah, I
> agree that to be consistent implementation, both madvice() and MAP_POPULATE
> should work.

MADV_POPULATE_WRITE + MADV_DONTNEED/FALLOC_FL_PUNCH_HOLE is one way to
dynamically manage memory consumption inside a sparse memory mapping
(preallocate/populate via MADV_POPULATE_WRITE, discard via
MADV_DONTNEED/FALLOC_FL_PUNCH_HOLE).  Extending that whole mechanism to
deal with VM_IO | VM_PFNMAP mappings as well could be interesting.

At least I herd about some ideas where we might want to dynamically
expose memory to a VM (via virtio-mem) inside a sparse memory mapping,
and the memory in that sparse memory mapping is provided from a
dedicated memory pool managed by a device driver -- not just using
ordinary anonymous/file/hugetlb memory as we do right now.

Now, this is certainly stuff for the future, I just wanted to mention it.

-- 
Thanks,

David / dhildenb



[Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/ttm: Evict and restore of compressed object

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915/ttm: Evict and restore of compressed object
URL   : https://patchwork.freedesktop.org/series/101106/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.




Re: [Intel-gfx] [PATCH v3 2/6] drm/i915/gt: Clear compress metadata for Flat-ccs objects

2022-03-07 Thread Hellstrom, Thomas
On Mon, 2022-03-07 at 19:10 +0530, Ramalingam C wrote:
> Xe-HP and latest devices support Flat CCS which reserved a portion of
> the device memory to store compression metadata, during the clearing
> of
> device memory buffer object we also need to clear the associated
> CCS buffer.
> 
> XY_FAST_COLOR_BLT cmd provides a option to clear the ccs metadata
> corresponding to the main memory that is cleared. So on Flat-CCS
> capable
> platform we use this option to clear the CCS meta data along with
> main
> memory.
> 
> v2: Fixed issues with platform naming [Lucas]
> v3: Rebased [Ram]
>     Used the round_up funcs [Bob]
> v4: Fixed ccs blk calculation [Ram]
>     Added Kdoc on flat-ccs.
> v5: GENMASK is used [Matt]
>     mocs fix [Matt]
>     Comments Fix [Matt]
>     Flush address programming [Ram]
> v6: FLUSH_DW is fixed
>     Few coding style fix
> v7: Adopting the XY_FAST_COLOR_BLT (Thomas]
> 
> Signed-off-by: Ramalingam C 
> Signed-off-by: Ayaz A Siddiqui 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 ++
>  drivers/gpu/drm/i915/gt/intel_migrate.c  | 39
> ++--
>  2 files changed, 39 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 925e55b6a94f..34cead49f35e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -207,8 +207,11 @@
>  #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
>  #define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
>  #define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
> +#define   FAST_CLEAR_0 (2 << 12)
>  #define   XY_FAST_COLOR_BLT_DW 16
>  #define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
> +#define   XY_FAST_COLOR_BLT_AUX_MASK   GENMASK(20, 18)
> +#define   XY_FAST_COLOR_BLT_AUX_CCS_E  5
>  #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
>  #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
>  #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index cb68f7bf6b28..05262f1b438e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -469,6 +469,34 @@ static bool wa_1209644611_applies(int ver, u32
> size)
> return height % 4 == 3 && height <= 8;
>  }
>  
> +/**
> + * DOC: Flat-CCS - Memory compression for Local memory
> + *
> + * On Xe-HP and later devices, we use dedicated compression control
> state (CCS)
> + * stored in local memory for each surface, to support the 3D and
> media
> + * compression formats.
> + *
> + * The memory required for the CCS of the entire local memory is
> 1/256 of the
> + * local memory size. So before the kernel boot, the required memory
> is reserved
> + * for the CCS data and a secure register will be programmed with
> the CCS base
> + * address.
> + *
> + * Flat CCS data needs to be cleared when a lmem object is
> allocated.
> + * And CCS data can be copied in and out of CCS region through
> + * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
> + *
> + * When we exhaust the lmem, if the object's placements support
> smem, then we can
> + * directly decompress the compressed lmem object into smem and
> start using it
> + * from smem itself.
> + *
> + * But when we need to swapout the compressed lmem object into a
> smem region
> + * though objects' placement doesn't support smem, then we copy the
> lmem content
> + * as it is into smem region along with ccs data (using
> XY_CTRL_SURF_COPY_BLT).
> + * When the object is referred, lmem content will be swaped in along
> with
> + * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at
> corresponding
> + * location.
> + */
> +
>  static int emit_copy(struct i915_request *rq,
>  u32 dst_offset, u32 src_offset, int size)
>  {
> @@ -621,8 +649,8 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size,
>  {
> struct drm_i915_private *i915 = rq->engine->i915;
> int mocs = rq->engine->gt->mocs.uc_index << 1;
> +   u32 *cs, spl_mode = 0, aux = 0, mem_type = 0;
> const int ver = GRAPHICS_VER(i915);
> -   u32 *cs, mem_type = 0;
> int ring_sz;
>  
> GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
> @@ -644,10 +672,15 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size,
> return PTR_ERR(cs);
>  
> if (ver >= 12) {
> +   if (HAS_FLAT_CCS(i915)) {
> +   spl_mode = FAST_CLEAR_0;
> +   aux = FIELD_PREP(XY_FAST_COLOR_BLT_AUX_MASK,

Did you have a chance to verify that this actually works, and whether
setting aux will clear just the CCS data or both CCS & main DATA?

If so,
Reviewed-by: Thomas Hellström 



> +   
> XY_FAST_COLOR_BLT_AUX_CCS_E);
> +   }
>  

[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/ttm: Evict and restore of compressed object

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915/ttm: Evict and restore of compressed object
URL   : https://patchwork.freedesktop.org/series/101106/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
ca55d5422881 drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+
ecd91714c6d2 drm/i915/gt: Clear compress metadata for Flat-ccs objects
f5878410b6ff drm/ttm: Add a parameter to add extra pages into ttm_tt
-:92: CHECK:PREFER_KERNEL_TYPES: Prefer kernel type 'u32' over 'uint32_t'
#92: FILE: drivers/gpu/drm/ttm/ttm_tt.c:150:
+   uint32_t page_flags, enum ttm_caching caching,

-:139: CHECK:PREFER_KERNEL_TYPES: Prefer kernel type 'u32' over 'uint32_t'
#139: FILE: include/drm/ttm/ttm_tt.h:151:
+   uint32_t page_flags, enum ttm_caching caching,

total: 0 errors, 0 warnings, 2 checks, 88 lines checked
7160c760c643 drm/i915/gem: Add extra pages in ttm_tt for ccs data
1f4b56203045 drm/i915/gt: Optimize the migration loop
68f3742dad6a drm/i915/migrate: Evict and restore the flatccs capable lmem obj
-:38: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#38: FILE: drivers/gpu/drm/i915/gt/intel_gpu_commands.h:156:
+#define   MI_FLUSH_DW_CCS  (1<<16)
  ^

-:41: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#41: FILE: drivers/gpu/drm/i915/gt/intel_gpu_commands.h:159:
+#define   MI_FLUSH_DW_LLC  (1<<9)
  ^

total: 0 errors, 0 warnings, 2 checks, 349 lines checked




Re: [Intel-gfx] [PATCH v3 1/6] drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+

2022-03-07 Thread Hellstrom, Thomas
Hi, Ram.

Typo in patch title FASR/FAST

On Mon, 2022-03-07 at 19:10 +0530, Ramalingam C wrote:
> XY_FAST_COLOR_BLT cmd is faster than the older XY_COLOR_BLT. Hence
> for
> clearing (Zero out) the pages of the newly allocated object, faster
> cmd
> is used.
> 
> Signed-off-by: Ramalingam C 
> Signed-off-by: Chris Wilson 
> ---
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  5 ++
>  drivers/gpu/drm/i915/gt/intel_migrate.c  | 51 +-
> --
>  2 files changed, 49 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index d112ffd56418..925e55b6a94f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -205,6 +205,11 @@
>  
>  #define COLOR_BLT_CMD  (2 << 29 | 0x40 << 22 | (5 -
> 2))
>  #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
> +#define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
> +#define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
> +#define   XY_FAST_COLOR_BLT_DW 16
> +#define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
> +#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
>  #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
>  #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
>  #define XY_SRC_COPY_BLT_CMD(2 << 29 | 0x53 << 22)
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c
> b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index 20444d6ceb3c..cb68f7bf6b28 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -16,6 +16,8 @@ struct insert_pte_data {
>  };
>  
>  #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
> +#define GET_CCS_BYTES(i915, size)  (HAS_FLAT_CCS(i915) ? \
> +    DIV_ROUND_UP(size,
> NUM_BYTES_PER_CCS_BYTE) : 0)
>  
>  static bool engine_supports_migration(struct intel_engine_cs
> *engine)
>  {
> @@ -614,20 +616,56 @@ intel_context_migrate_copy(struct intel_context
> *ce,
> return err;
>  }
>  
> -static int emit_clear(struct i915_request *rq, u64 offset, int size,
> u32 value)
> +static int emit_clear(struct i915_request *rq, u64 offset, int size,
> + u32 value, bool is_lmem)
>  {
> -   const int ver = GRAPHICS_VER(rq->engine->i915);
> -   u32 *cs;
> +   struct drm_i915_private *i915 = rq->engine->i915;
> +   int mocs = rq->engine->gt->mocs.uc_index << 1;
> +   const int ver = GRAPHICS_VER(i915);
> +   u32 *cs, mem_type = 0;
> +   int ring_sz;
>  
> GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
>  
> offset += (u64)rq->engine->instance << 32;
>  
> -   cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
> +   if (ver >= 12)
> +   ring_sz = 16;

Noting that DG1 doesn't use more than 11 dwords? Doesn't matter much I
guess if we pad with NOP.

> +   else if (ver >= 8)
> +   ring_sz = 8;
> +   else
> +   ring_sz = 6;
> +
> +   if (!is_lmem)
> +   mem_type = 1 << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;

Should we use the MEM_TYPE macros so it becomes clearer what we're
doing? 

Also does DG1 support the mocs and mem_type fields? If not should we
set these to 0 for relevant hardware?

> +
> +   cs = intel_ring_begin(rq, ring_sz);
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>  
> -   if (ver >= 8) {
> +   if (ver >= 12) {
> +   *cs++ = XY_FAST_COLOR_BLT_CMD |
> XY_FAST_COLOR_BLT_DEPTH_32 |
> +   (XY_FAST_COLOR_BLT_DW - 2);
> +   *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs)
> |
> +   (PAGE_SIZE - 1);
> +   *cs++ = 0;
> +   *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
> +   *cs++ = lower_32_bits(offset);
> +   *cs++ = upper_32_bits(offset);
> +   *cs++ = mem_type;
> +   /* BG7 */
> +   *cs++ = value;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   /* BG11 */
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   /* BG13 */
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   *cs++ = 0;
> +   } else if (ver >= 8) {
> *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
> *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY |
> PAGE_SIZE;
> *cs++ = 0;
> @@ -645,7 +683,6 @@ static int emit_clear(struct i915_request *rq,
> u64 offset, int size, u32 value)
> *cs++ = lower_32_bits(offset);
> *cs++ = value;
> }
> -
> intel_ring_advance(rq, cs);
> return 0;
>  }
> @@ -711,7 +748,7 @@ intel_context_migrate_clear(struct intel_context
> *ce,
> if (err)
> goto out_rq;
>  
> -   err = 

[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: opportunistically apply ALLOC_CONTIGIOUS (rev2)

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915: opportunistically apply ALLOC_CONTIGIOUS (rev2)
URL   : https://patchwork.freedesktop.org/series/99631/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11332 -> Patchwork_22498


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/index.html

Participating hosts (44 -> 38)
--

  Additional (1): fi-pnv-d510 
  Missing(7): fi-kbl-soraka shard-tglu fi-bsw-cyan shard-rkl shard-dg1 
bat-jsl-2 fi-bdw-samus 

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_22498:

### IGT changes ###

 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live@gt_lrc:
- {bat-dg2-9}:NOTRUN -> [INCOMPLETE][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-dg2-9/igt@i915_selftest@live@gt_lrc.html

  * igt@kms_frontbuffer_tracking@basic:
- {bat-dg2-9}:NOTRUN -> [FAIL][2]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-dg2-9/igt@kms_frontbuffer_track...@basic.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
- {bat-dg2-9}:NOTRUN -> [DMESG-WARN][3]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-dg2-9/igt@kms_pipe_crc_ba...@suspend-read-crc-pipe-a.html

  * igt@prime_vgem@basic-write:
- {bat-dg2-9}:NOTRUN -> [SKIP][4] +16 similar issues
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-dg2-9/igt@prime_v...@basic-write.html

  
Known issues


  Here are the changes found in Patchwork_22498 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_suspend@basic-s3@smem:
- fi-skl-6600u:   [PASS][5] -> [INCOMPLETE][6] ([i915#4547])
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-skl-6600u/igt@gem_exec_suspend@basic...@smem.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/fi-skl-6600u/igt@gem_exec_suspend@basic...@smem.html
- fi-bdw-5557u:   [PASS][7] -> [INCOMPLETE][8] ([i915#146])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-bdw-5557u/igt@gem_exec_suspend@basic...@smem.html
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/fi-bdw-5557u/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-pnv-d510:NOTRUN -> [SKIP][9] ([fdo#109271]) +57 similar issues
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/fi-pnv-d510/igt@gem_huc_c...@huc-copy.html

  
 Possible fixes 

  * igt@i915_pm_rps@basic-api:
- bat-dg1-6:  [FAIL][10] ([i915#4032]) -> [PASS][11]
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-dg1-6/igt@i915_pm_...@basic-api.html
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-dg1-6/igt@i915_pm_...@basic-api.html

  * igt@i915_selftest@live@gt_heartbeat:
- {fi-tgl-dsi}:   [DMESG-FAIL][12] ([i915#541]) -> [PASS][13]
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-tgl-dsi/igt@i915_selftest@live@gt_heartbeat.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/fi-tgl-dsi/igt@i915_selftest@live@gt_heartbeat.html

  * igt@i915_selftest@live@requests:
- {bat-rpls-2}:   [DMESG-FAIL][14] ([i915#5087]) -> [PASS][15]
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-rpls-2/igt@i915_selftest@l...@requests.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-rpls-2/igt@i915_selftest@l...@requests.html

  * igt@kms_busy@basic@flip:
- {bat-adlp-6}:   [DMESG-WARN][16] ([i915#3576]) -> [PASS][17]
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-adlp-6/igt@kms_busy@ba...@flip.html
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-adlp-6/igt@kms_busy@ba...@flip.html
- {bat-dg2-9}:[DMESG-WARN][18] -> [PASS][19]
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-dg2-9/igt@kms_busy@ba...@flip.html
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-dg2-9/igt@kms_busy@ba...@flip.html

  
 Warnings 

  * igt@i915_selftest@live@hangcheck:
- bat-dg1-6:  [DMESG-FAIL][20] ([i915#4957]) -> [DMESG-FAIL][21] 
([i915#4494] / [i915#4957])
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/bat-dg1-6/igt@i915_selftest@l...@hangcheck.html
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22498/bat-dg1-6/igt@i915_selftest@l...@hangcheck.html

  
  {name}: This element is suppressed. This means it is ignored when computing
  the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  

[Intel-gfx] ✗ Fi.CI.BUILD: failure for MAP_POPULATE for device memory

2022-03-07 Thread Patchwork
== Series Details ==

Series: MAP_POPULATE for device memory
URL   : https://patchwork.freedesktop.org/series/101099/
State : failure

== Summary ==

Applying: mm: Add f_ops->populate()
Applying: x86/sgx: Export sgx_encl_page_alloc()
error: sha1 information is lacking or useless (arch/x86/kernel/cpu/sgx/encl.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0002 x86/sgx: Export sgx_encl_page_alloc()
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".




[Intel-gfx] ✗ Fi.CI.BUILD: failure for mm: Add f_ops->populate() (rev3)

2022-03-07 Thread Patchwork
== Series Details ==

Series: mm: Add f_ops->populate() (rev3)
URL   : https://patchwork.freedesktop.org/series/101097/
State : failure

== Summary ==

CALLscripts/checksyscalls.sh
  CALLscripts/atomic/check-atomics.sh
  DESCEND objtool
  CHK include/generated/compile.h
  CC  ipc/shm.o
ipc/shm.c: In function ‘shm_mmap’:
ipc/shm.c:590:34: error: ‘do_populate’ undeclared (first use in this function); 
did you mean ‘mm_populate’?
  ret = call_mmap(sfd->file, vma, do_populate);
  ^~~
  mm_populate
ipc/shm.c:590:34: note: each undeclared identifier is reported only once for 
each function it appears in
scripts/Makefile.build:288: recipe for target 'ipc/shm.o' failed
make[1]: *** [ipc/shm.o] Error 1
Makefile:1831: recipe for target 'ipc' failed
make: *** [ipc] Error 2




[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/gtt: reduce overzealous alignment constraints for GGTT (rev3)

2022-03-07 Thread Patchwork
== Series Details ==

Series: drm/i915/gtt: reduce overzealous alignment constraints for GGTT (rev3)
URL   : https://patchwork.freedesktop.org/series/100991/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11332 -> Patchwork_22497


Summary
---

  **SUCCESS**

  No regressions found.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/index.html

Participating hosts (44 -> 38)
--

  Additional (1): fi-tgl-1115g4 
  Missing(7): fi-kbl-soraka shard-tglu shard-rkl fi-bsw-cyan bat-rpls-2 
shard-dg1 fi-bdw-samus 

Known issues


  Here are the changes found in Patchwork_22497 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@core_hotunplug@unbind-rebind:
- fi-bwr-2160:[PASS][1] -> [FAIL][2] ([i915#3194])
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-bwr-2160/igt@core_hotunp...@unbind-rebind.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-bwr-2160/igt@core_hotunp...@unbind-rebind.html

  * igt@gem_exec_suspend@basic-s3@smem:
- fi-bdw-5557u:   [PASS][3] -> [INCOMPLETE][4] ([i915#146])
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-bdw-5557u/igt@gem_exec_suspend@basic...@smem.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-bdw-5557u/igt@gem_exec_suspend@basic...@smem.html

  * igt@gem_huc_copy@huc-copy:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][5] ([i915#2190])
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@gem_huc_c...@huc-copy.html

  * igt@gem_lmem_swapping@basic:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][6] ([i915#4613]) +3 similar issues
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@gem_lmem_swapp...@basic.html

  * igt@i915_pm_backlight@basic-brightness:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][7] ([i915#1155])
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@i915_pm_backli...@basic-brightness.html

  * igt@i915_pm_rpm@module-reload:
- fi-tgl-1115g4:  NOTRUN -> [INCOMPLETE][8] ([i915#1385] / [i915#62])
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@i915_pm_...@module-reload.html

  * igt@i915_selftest@live:
- fi-skl-6600u:   NOTRUN -> [FAIL][9] ([i915#4547])
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-skl-6600u/igt@i915_selft...@live.html

  * igt@kms_chamelium@vga-hpd-fast:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][10] ([fdo#111827]) +8 similar issues
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@kms_chamel...@vga-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][11] ([i915#4103]) +1 similar issue
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_flip@basic-flip-vs-modeset@c-dp3:
- fi-tgl-1115g4:  NOTRUN -> [DMESG-WARN][12] ([i915#4002]) +88 similar 
issues
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@kms_flip@basic-flip-vs-mode...@c-dp3.html

  * igt@kms_force_connector_basic@force-load-detect:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][13] ([fdo#109285])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@kms_force_connector_ba...@force-load-detect.html

  * igt@kms_psr@primary_mmap_gtt:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][14] ([fdo#110189]) +3 similar issues
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@kms_psr@primary_mmap_gtt.html

  * igt@prime_vgem@basic-userptr:
- fi-skl-6600u:   NOTRUN -> [SKIP][15] ([fdo#109271])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-skl-6600u/igt@prime_v...@basic-userptr.html
- fi-tgl-1115g4:  NOTRUN -> [SKIP][16] ([i915#3301])
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@prime_v...@basic-userptr.html

  * igt@runner@aborted:
- fi-tgl-1115g4:  NOTRUN -> [FAIL][17] ([i915#2722] / [i915#4312])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-1115g4/igt@run...@aborted.html

  
 Possible fixes 

  * igt@i915_selftest@live@gt_heartbeat:
- {fi-tgl-dsi}:   [DMESG-FAIL][18] ([i915#541]) -> [PASS][19]
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11332/fi-tgl-dsi/igt@i915_selftest@live@gt_heartbeat.html
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22497/fi-tgl-dsi/igt@i915_selftest@live@gt_heartbeat.html

  * igt@i915_selftest@live@hangcheck:
- bat-dg1-6:  [DMESG-FAIL][20] ([i915#4957]) -> [PASS][21]
   [20]: 

[Intel-gfx] [PATCH v3 6/6] drm/i915/migrate: Evict and restore the flatccs capable lmem obj

2022-03-07 Thread Ramalingam C
When we are swapping out the local memory obj on flat-ccs capable platform,
we need to capture the ccs data too along with main meory and we need to
restore it when we are swapping in the content.

When lmem object is swapped into a smem obj, smem obj will
have the extra pages required to hold the ccs data corresponding to the
lmem main memory. So main memory of lmem will be copied into the initial
pages of the smem and then ccs data corresponding to the main memory
will be copied to the subsequent pages of smem. ccs data is 1/256 of
lmem size.

Swapin happens exactly in reverse order. First main memory of lmem is
restored from the smem's initial pages and the ccs data will be restored
from the subsequent pages of smem.

Extracting and restoring the CCS data is done through a special cmd called
XY_CTRL_SURF_COPY_BLT

v2: Fixing the ccs handling
v3: Handle the ccs data at same loop as main memory [Thomas]

Signed-off-by: Ramalingam C 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  15 +
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 274 ++-
 2 files changed, 285 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 34cead49f35e..fa428a67620e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -153,8 +153,10 @@
 #define   MI_FLUSH_DW_PROTECTED_MEM_EN (1 << 22)
 #define   MI_FLUSH_DW_STORE_INDEX  (1<<21)
 #define   MI_INVALIDATE_TLB(1<<18)
+#define   MI_FLUSH_DW_CCS  (1<<16)
 #define   MI_FLUSH_DW_OP_STOREDW   (1<<14)
 #define   MI_FLUSH_DW_OP_MASK  (3<<14)
+#define   MI_FLUSH_DW_LLC  (1<<9)
 #define   MI_FLUSH_DW_NOTIFY   (1<<8)
 #define   MI_INVALIDATE_BSD(1<<7)
 #define   MI_FLUSH_DW_USE_GTT  (1<<2)
@@ -203,6 +205,19 @@
 #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
 #define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
 
+#define XY_CTRL_SURF_INSTR_SIZE5
+#define MI_FLUSH_DW_SIZE   3
+#define XY_CTRL_SURF_COPY_BLT  ((2 << 29) | (0x48 << 22) | 3)
+#define   SRC_ACCESS_TYPE_SHIFT21
+#define   DST_ACCESS_TYPE_SHIFT20
+#define   CCS_SIZE_MASKGENMASK(17, 8)
+#define   XY_CTRL_SURF_MOCS_MASK   GENMASK(31, 25)
+#define   NUM_CCS_BYTES_PER_BLOCK  256
+#define   NUM_BYTES_PER_CCS_BYTE   256
+#define   NUM_CCS_BLKS_PER_XFER1024
+#define   INDIRECT_ACCESS  0
+#define   DIRECT_ACCESS1
+
 #define COLOR_BLT_CMD  (2 << 29 | 0x40 << 22 | (5 - 2))
 #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
 #define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 24e0e73e4a90..6d2181725d76 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -497,6 +497,120 @@ static bool wa_1209644611_applies(int ver, u32 size)
  * location.
  */
 
+static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
+{
+   *cmd++ = MI_FLUSH_DW | flags;
+   *cmd++ = 0;
+   *cmd++ = 0;
+
+   return cmd;
+}
+
+static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
+{
+   u32 num_cmds, num_blks, total_size;
+
+   if (!GET_CCS_BYTES(i915, size))
+   return 0;
+
+   /*
+* XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte
+* blocks. one XY_CTRL_SURF_COPY_BLT command can
+* transfer upto 1024 blocks.
+*/
+   num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
+   NUM_CCS_BYTES_PER_BLOCK);
+   num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER);
+   total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds;
+
+   /*
+* Adding a flush before and after XY_CTRL_SURF_COPY_BLT
+*/
+   total_size += 2 * MI_FLUSH_DW_SIZE;
+
+   return total_size;
+}
+
+static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr,
+u8 src_mem_access, u8 dst_mem_access,
+int src_mocs, int dst_mocs,
+u32 ccs_blocks)
+{
+   /*
+* The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS
+* data in and out of the CCS region.
+*
+* We can copy at most 1024 blocks of 256 bytes using one
+* XY_CTRL_SURF_COPY_BLT instruction.
+*
+* In case we need to copy more than 1024 blocks, we need to add
+* another instruction to the same batch buffer.
+*
+* 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS.
+*
+* 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM.
+*/
+   do {
+   int 

[Intel-gfx] [PATCH v3 5/6] drm/i915/gt: Optimize the migration loop

2022-03-07 Thread Ramalingam C
Move the static calculations out of the loop.

Signed-off-by: Ramalingam C 
---
 drivers/gpu/drm/i915/gt/intel_migrate.c | 34 -
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 05262f1b438e..24e0e73e4a90 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -556,6 +556,7 @@ intel_context_migrate_copy(struct intel_context *ce,
   struct i915_request **out)
 {
struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst);
+   u32 src_offset, dst_offset;
struct i915_request *rq;
int err;
 
@@ -564,8 +565,20 @@ intel_context_migrate_copy(struct intel_context *ce,
 
GEM_BUG_ON(ce->ring->size < SZ_64K);
 
+   src_offset = 0;
+   dst_offset = CHUNK_SZ;
+   if (HAS_64K_PAGES(ce->engine->i915)) {
+   GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
+
+   src_offset = 0;
+   dst_offset = 0;
+   if (src_is_lmem)
+   src_offset = CHUNK_SZ;
+   if (dst_is_lmem)
+   dst_offset = 2 * CHUNK_SZ;
+   }
+
do {
-   u32 src_offset, dst_offset;
int len;
 
rq = i915_request_create(ce);
@@ -593,19 +606,6 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
 
-   src_offset = 0;
-   dst_offset = CHUNK_SZ;
-   if (HAS_64K_PAGES(ce->engine->i915)) {
-   GEM_BUG_ON(!src_is_lmem && !dst_is_lmem);
-
-   src_offset = 0;
-   dst_offset = 0;
-   if (src_is_lmem)
-   src_offset = CHUNK_SZ;
-   if (dst_is_lmem)
-   dst_offset = 2 * CHUNK_SZ;
-   }
-
len = emit_pte(rq, _src, src_cache_level, src_is_lmem,
   src_offset, CHUNK_SZ);
if (len <= 0) {
@@ -615,12 +615,10 @@ intel_context_migrate_copy(struct intel_context *ce,
 
err = emit_pte(rq, _dst, dst_cache_level, dst_is_lmem,
   dst_offset, len);
-   if (err < 0)
-   goto out_rq;
-   if (err < len) {
+   if (err < len)
err = -EINVAL;
+   if (err < 0)
goto out_rq;
-   }
 
err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
if (err)
-- 
2.20.1



[Intel-gfx] [PATCH v3 4/6] drm/i915/gem: Add extra pages in ttm_tt for ccs data

2022-03-07 Thread Ramalingam C
On Xe-HP and later devices, dedicated compression control state (CCS)
stored in local memory is used for each surface, to support the
3D and media compression formats.

The memory required for the CCS of the entire local memory is 1/256 of
the local memory size. So before the kernel boot, the required memory
is reserved for the CCS data and a secure register will be programmed
with the CCS base address

So when an object is allocated in local memory, dont need to explicitly
allocate the space for ccs data. But when the obj is evicted into the
smem, to hold the compression related data along with the obj extra space
is needed in smem. i.e obj_size + (obj_size/256).

Hence when a smem pages are allocated for an obj with lmem placement
possibility we create with the extra pages required for the ccs data for
the obj size.

v2:
  Used imperative wording [Thomas]

Signed-off-by: Ramalingam C 
cc: Christian Koenig 
cc: Hellstrom Thomas 
Reviewed-by: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 1a8262f5f692..c7a36861c38d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -20,6 +20,7 @@
 #include "gem/i915_gem_ttm.h"
 #include "gem/i915_gem_ttm_move.h"
 #include "gem/i915_gem_ttm_pm.h"
+#include "gt/intel_gpu_commands.h"
 
 #define I915_TTM_PRIO_PURGE 0
 #define I915_TTM_PRIO_NO_PAGES  1
@@ -255,12 +256,27 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = {
.release = i915_ttm_tt_release
 };
 
+static inline bool
+i915_gem_object_has_lmem_placement(struct drm_i915_gem_object *obj)
+{
+   int i;
+
+   for (i = 0; i < obj->mm.n_placements; i++)
+   if (obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL)
+   return true;
+
+   return false;
+}
+
 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 uint32_t page_flags)
 {
+   struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+bdev);
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, bo->resource->mem_type);
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+   unsigned long ccs_pages = 0;
enum ttm_caching caching;
struct i915_ttm_tt *i915_tt;
int ret;
@@ -283,7 +299,12 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
 
-   ret = ttm_tt_init(_tt->ttm, bo, page_flags, caching, 0);
+   if (HAS_FLAT_CCS(i915) && i915_gem_object_has_lmem_placement(obj))
+   ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size,
+ NUM_BYTES_PER_CCS_BYTE),
+PAGE_SIZE);
+
+   ret = ttm_tt_init(_tt->ttm, bo, page_flags, caching, ccs_pages);
if (ret)
goto err_free;
 
-- 
2.20.1



[Intel-gfx] [PATCH v3 3/6] drm/ttm: Add a parameter to add extra pages into ttm_tt

2022-03-07 Thread Ramalingam C
Add a parameter called "extra_pages" for ttm_tt_init, to indicate that
driver needs extra pages in ttm_tt.

v2:
  Used imperative wording [Thomas and Christian]

Signed-off-by: Ramalingam C 
cc: Christian Koenig 
cc: Hellstrom Thomas 
Reviewed-by: Thomas Hellstrom 
Reviewed-by: Christian Konig 
---
 drivers/gpu/drm/drm_gem_vram_helper.c  |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c|  2 +-
 drivers/gpu/drm/qxl/qxl_ttm.c  |  2 +-
 drivers/gpu/drm/ttm/ttm_agp_backend.c  |  2 +-
 drivers/gpu/drm/ttm/ttm_tt.c   | 12 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  2 +-
 include/drm/ttm/ttm_tt.h   |  4 +++-
 7 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c 
b/drivers/gpu/drm/drm_gem_vram_helper.c
index dc7f938bfff2..123045b58fec 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -867,7 +867,7 @@ static struct ttm_tt *bo_driver_ttm_tt_create(struct 
ttm_buffer_object *bo,
if (!tt)
return NULL;
 
-   ret = ttm_tt_init(tt, bo, page_flags, ttm_cached);
+   ret = ttm_tt_init(tt, bo, page_flags, ttm_cached, 0);
if (ret < 0)
goto err_ttm_tt_init;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 45cc5837ce00..1a8262f5f692 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -283,7 +283,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct 
ttm_buffer_object *bo,
i915_tt->is_shmem = true;
}
 
-   ret = ttm_tt_init(_tt->ttm, bo, page_flags, caching);
+   ret = ttm_tt_init(_tt->ttm, bo, page_flags, caching, 0);
if (ret)
goto err_free;
 
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index b2e33d5ba5d0..52156b54498f 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -113,7 +113,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct 
ttm_buffer_object *bo,
ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
if (ttm == NULL)
return NULL;
-   if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) {
+   if (ttm_tt_init(ttm, bo, page_flags, ttm_cached, 0)) {
kfree(ttm);
return NULL;
}
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c 
b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index 6ddc16f0fe2b..d27691f2e451 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -134,7 +134,7 @@ struct ttm_tt *ttm_agp_tt_create(struct ttm_buffer_object 
*bo,
agp_be->mem = NULL;
agp_be->bridge = bridge;
 
-   if (ttm_tt_init(_be->ttm, bo, page_flags, ttm_write_combined)) {
+   if (ttm_tt_init(_be->ttm, bo, page_flags, ttm_write_combined, 0)) {
kfree(agp_be);
return NULL;
}
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index d234aab800a0..1a66d9fc589a 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -134,9 +134,10 @@ void ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt 
*ttm)
 static void ttm_tt_init_fields(struct ttm_tt *ttm,
   struct ttm_buffer_object *bo,
   uint32_t page_flags,
-  enum ttm_caching caching)
+  enum ttm_caching caching,
+  unsigned long extra_pages)
 {
-   ttm->num_pages = PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT;
+   ttm->num_pages = (PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT) + 
extra_pages;
ttm->caching = ttm_cached;
ttm->page_flags = page_flags;
ttm->dma_address = NULL;
@@ -146,9 +147,10 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm,
 }
 
 int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
-   uint32_t page_flags, enum ttm_caching caching)
+   uint32_t page_flags, enum ttm_caching caching,
+   unsigned long extra_pages)
 {
-   ttm_tt_init_fields(ttm, bo, page_flags, caching);
+   ttm_tt_init_fields(ttm, bo, page_flags, caching, extra_pages);
 
if (ttm_tt_alloc_page_directory(ttm)) {
pr_err("Failed allocating page table\n");
@@ -180,7 +182,7 @@ int ttm_sg_tt_init(struct ttm_tt *ttm, struct 
ttm_buffer_object *bo,
 {
int ret;
 
-   ttm_tt_init_fields(ttm, bo, page_flags, caching);
+   ttm_tt_init_fields(ttm, bo, page_flags, caching, 0);
 
if (page_flags & TTM_TT_FLAG_EXTERNAL)
ret = ttm_sg_tt_alloc_page_directory(ttm);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index b84ecc6d6611..4e3938e62c08 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ 

[Intel-gfx] [PATCH v3 2/6] drm/i915/gt: Clear compress metadata for Flat-ccs objects

2022-03-07 Thread Ramalingam C
Xe-HP and latest devices support Flat CCS which reserved a portion of
the device memory to store compression metadata, during the clearing of
device memory buffer object we also need to clear the associated
CCS buffer.

XY_FAST_COLOR_BLT cmd provides a option to clear the ccs metadata
corresponding to the main memory that is cleared. So on Flat-CCS capable
platform we use this option to clear the CCS meta data along with main
memory.

v2: Fixed issues with platform naming [Lucas]
v3: Rebased [Ram]
Used the round_up funcs [Bob]
v4: Fixed ccs blk calculation [Ram]
Added Kdoc on flat-ccs.
v5: GENMASK is used [Matt]
mocs fix [Matt]
Comments Fix [Matt]
Flush address programming [Ram]
v6: FLUSH_DW is fixed
Few coding style fix
v7: Adopting the XY_FAST_COLOR_BLT (Thomas]

Signed-off-by: Ramalingam C 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  3 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 39 ++--
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 925e55b6a94f..34cead49f35e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -207,8 +207,11 @@
 #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
 #define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
 #define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
+#define   FAST_CLEAR_0 (2 << 12)
 #define   XY_FAST_COLOR_BLT_DW 16
 #define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
+#define   XY_FAST_COLOR_BLT_AUX_MASK   GENMASK(20, 18)
+#define   XY_FAST_COLOR_BLT_AUX_CCS_E  5
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
 #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index cb68f7bf6b28..05262f1b438e 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -469,6 +469,34 @@ static bool wa_1209644611_applies(int ver, u32 size)
return height % 4 == 3 && height <= 8;
 }
 
+/**
+ * DOC: Flat-CCS - Memory compression for Local memory
+ *
+ * On Xe-HP and later devices, we use dedicated compression control state (CCS)
+ * stored in local memory for each surface, to support the 3D and media
+ * compression formats.
+ *
+ * The memory required for the CCS of the entire local memory is 1/256 of the
+ * local memory size. So before the kernel boot, the required memory is 
reserved
+ * for the CCS data and a secure register will be programmed with the CCS base
+ * address.
+ *
+ * Flat CCS data needs to be cleared when a lmem object is allocated.
+ * And CCS data can be copied in and out of CCS region through
+ * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
+ *
+ * When we exhaust the lmem, if the object's placements support smem, then we 
can
+ * directly decompress the compressed lmem object into smem and start using it
+ * from smem itself.
+ *
+ * But when we need to swapout the compressed lmem object into a smem region
+ * though objects' placement doesn't support smem, then we copy the lmem 
content
+ * as it is into smem region along with ccs data (using XY_CTRL_SURF_COPY_BLT).
+ * When the object is referred, lmem content will be swaped in along with
+ * restoration of the CCS data (using XY_CTRL_SURF_COPY_BLT) at corresponding
+ * location.
+ */
+
 static int emit_copy(struct i915_request *rq,
 u32 dst_offset, u32 src_offset, int size)
 {
@@ -621,8 +649,8 @@ static int emit_clear(struct i915_request *rq, u64 offset, 
int size,
 {
struct drm_i915_private *i915 = rq->engine->i915;
int mocs = rq->engine->gt->mocs.uc_index << 1;
+   u32 *cs, spl_mode = 0, aux = 0, mem_type = 0;
const int ver = GRAPHICS_VER(i915);
-   u32 *cs, mem_type = 0;
int ring_sz;
 
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
@@ -644,10 +672,15 @@ static int emit_clear(struct i915_request *rq, u64 
offset, int size,
return PTR_ERR(cs);
 
if (ver >= 12) {
+   if (HAS_FLAT_CCS(i915)) {
+   spl_mode = FAST_CLEAR_0;
+   aux = FIELD_PREP(XY_FAST_COLOR_BLT_AUX_MASK,
+XY_FAST_COLOR_BLT_AUX_CCS_E);
+   }
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
-   (XY_FAST_COLOR_BLT_DW - 2);
+   spl_mode | (XY_FAST_COLOR_BLT_DW - 2);
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
-   (PAGE_SIZE - 1);
+   (PAGE_SIZE - 1) | aux;
*cs++ = 0;
*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
*cs++ = lower_32_bits(offset);

[Intel-gfx] [PATCH v3 1/6] drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+

2022-03-07 Thread Ramalingam C
XY_FAST_COLOR_BLT cmd is faster than the older XY_COLOR_BLT. Hence for
clearing (Zero out) the pages of the newly allocated object, faster cmd
is used.

Signed-off-by: Ramalingam C 
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  5 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 51 +---
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index d112ffd56418..925e55b6a94f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -205,6 +205,11 @@
 
 #define COLOR_BLT_CMD  (2 << 29 | 0x40 << 22 | (5 - 2))
 #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
+#define XY_FAST_COLOR_BLT_CMD  (2 << 29 | 0x44 << 22)
+#define   XY_FAST_COLOR_BLT_DEPTH_32   (2 << 19)
+#define   XY_FAST_COLOR_BLT_DW 16
+#define   XY_FAST_COLOR_BLT_MOCS_MASK  GENMASK(27, 21)
+#define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
 #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
 #define GEN9_XY_FAST_COPY_BLT_CMD  (2 << 29 | 0x42 << 22)
 #define XY_SRC_COPY_BLT_CMD(2 << 29 | 0x53 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 20444d6ceb3c..cb68f7bf6b28 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -16,6 +16,8 @@ struct insert_pte_data {
 };
 
 #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
+#define GET_CCS_BYTES(i915, size)  (HAS_FLAT_CCS(i915) ? \
+DIV_ROUND_UP(size, 
NUM_BYTES_PER_CCS_BYTE) : 0)
 
 static bool engine_supports_migration(struct intel_engine_cs *engine)
 {
@@ -614,20 +616,56 @@ intel_context_migrate_copy(struct intel_context *ce,
return err;
 }
 
-static int emit_clear(struct i915_request *rq, u64 offset, int size, u32 value)
+static int emit_clear(struct i915_request *rq, u64 offset, int size,
+ u32 value, bool is_lmem)
 {
-   const int ver = GRAPHICS_VER(rq->engine->i915);
-   u32 *cs;
+   struct drm_i915_private *i915 = rq->engine->i915;
+   int mocs = rq->engine->gt->mocs.uc_index << 1;
+   const int ver = GRAPHICS_VER(i915);
+   u32 *cs, mem_type = 0;
+   int ring_sz;
 
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
 
offset += (u64)rq->engine->instance << 32;
 
-   cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
+   if (ver >= 12)
+   ring_sz = 16;
+   else if (ver >= 8)
+   ring_sz = 8;
+   else
+   ring_sz = 6;
+
+   if (!is_lmem)
+   mem_type = 1 << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
+
+   cs = intel_ring_begin(rq, ring_sz);
if (IS_ERR(cs))
return PTR_ERR(cs);
 
-   if (ver >= 8) {
+   if (ver >= 12) {
+   *cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
+   (XY_FAST_COLOR_BLT_DW - 2);
+   *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
+   (PAGE_SIZE - 1);
+   *cs++ = 0;
+   *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+   *cs++ = lower_32_bits(offset);
+   *cs++ = upper_32_bits(offset);
+   *cs++ = mem_type;
+   /* BG7 */
+   *cs++ = value;
+   *cs++ = 0;
+   *cs++ = 0;
+   *cs++ = 0;
+   /* BG11 */
+   *cs++ = 0;
+   *cs++ = 0;
+   /* BG13 */
+   *cs++ = 0;
+   *cs++ = 0;
+   *cs++ = 0;
+   } else if (ver >= 8) {
*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
*cs++ = 0;
@@ -645,7 +683,6 @@ static int emit_clear(struct i915_request *rq, u64 offset, 
int size, u32 value)
*cs++ = lower_32_bits(offset);
*cs++ = value;
}
-
intel_ring_advance(rq, cs);
return 0;
 }
@@ -711,7 +748,7 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
 
-   err = emit_clear(rq, offset, len, value);
+   err = emit_clear(rq, offset, len, value, is_lmem);
 
/* Arbitration is re-enabled between requests. */
 out_rq:
-- 
2.20.1



[Intel-gfx] [PATCH v3 0/6] drm/i915/ttm: Evict and restore of compressed object

2022-03-07 Thread Ramalingam C
On Xe-HP and later devices, we use dedicated compression control
state (CCS) stored in local memory for each surface, to support
the 3D and media compression formats.

The memory required for the CCS of the entire local memory is
1/256 of the local memory size. So before the kernel
boot, the required memory is reserved for the CCS data and a
secure register will be programmed with the CCS base address

So when we allocate a object in local memory we dont need to explicitly
allocate the space for ccs data. But when we evict the obj into the smem
to hold the compression related data along with the obj we need smem
space of obj_size + (obj_size/256).

Hence when we create smem for an obj with lmem placement possibility we
create with the extra space.

When we are swapping out the local memory obj on flat-ccs capable platform,
we need to capture the ccs data too along with main meory and we need to
restore it when we are swapping in the content.

When lmem object is swapped into a smem obj, smem obj will
have the extra pages required to hold the ccs data corresponding to the
lmem main memory. So main memory of lmem will be copied into the initial
pages of the smem and then ccs data corresponding to the main memory
will be copied to the subsequent pages of smem.

Swapin happens exactly in reverse order. First main memory of lmem is
restored from the smem's initial pages and the ccs data will be restored
from the subsequent pages of smem.

Extracting and restoring the CCS data is done through a special cmd called
XY_CTRL_SURF_COPY_BLT

v3:
  Fast_Clear_0 is used for clearing the ccs data on obj allocation [Thomas]
  Migration of main memory and ccs data are done in single request [Thomas]
  Small optimization patch is added for the migration loop
  CCS clearing is split into two patches.

Test-with: 20220307121042.23287-4-ramalinga...@intel.com

Ramalingam C (6):
  drm/i915/gt: Use XY_FASR_COLOR_BLT to clear obj on graphics ver 12+
  drm/i915/gt: Clear compress metadata for Flat-ccs objects
  drm/ttm: Add a parameter to add extra pages into ttm_tt
  drm/i915/gem: Add extra pages in ttm_tt for ccs data
  drm/i915/gt: Optimize the migration loop
  drm/i915/migrate: Evict and restore the flatccs capable lmem obj

 drivers/gpu/drm/drm_gem_vram_helper.c|   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  |  23 +-
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  23 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 390 +--
 drivers/gpu/drm/qxl/qxl_ttm.c|   2 +-
 drivers/gpu/drm/ttm/ttm_agp_backend.c|   2 +-
 drivers/gpu/drm/ttm/ttm_tt.c |  12 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c   |   2 +-
 include/drm/ttm/ttm_tt.h |   4 +-
 9 files changed, 421 insertions(+), 39 deletions(-)

-- 
2.20.1



Re: [Intel-gfx] [PATCH v3 i-g-t] lib/intel_mmio: Fix mmapped resources not unmapped on fini

2022-03-07 Thread Kamil Konieczny
Hi Janusz,

Dnia 2022-03-07 at 09:26:43 +0100, Janusz Krzysztofik napisał(a):
> Commit 5f3cfa485eb4 ("lib: Use safe wrappers around libpciaccess
> initialization functions") took care of not leaking memory allocated by
> pci_system_init() but didn't take care of users potentially attempting to
> reinitialize global data maintained by libpciaccess.  For example,
> intel_register_access_init() mmaps device's PCI BAR0 resource with
> pci_device_map_range() but intel_register_access_fini() doesn't unmap it
> and next call to intel_register_access_init() fails on attempt to mmap it
> again.
> 
> Fix it, and also provide intel_mmio_unmap_*() counterparts to public
> functions intel_mmio_use_pci_bar() and intel_mmio_use_dump_file().
> 
> v2: apply last minute fixes, cached but unfortunately not committed before
> sending
> v3: use .pci_device_id field content as an indicator of arg initialization
> via intel_register_access_init(),
>   - improve checks of argument initialization status,
>   - shorten warning messages (Kamil),
>   - don't fill .mmio_size field until initialization succeeds (Kamil)
> 
> Signed-off-by: Janusz Krzysztofik 
> Cc: Kamil Konieczny 
> ---
>  lib/intel_io.h   |  4 +++
>  lib/intel_mmio.c | 64 +---
>  2 files changed, 65 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/intel_io.h b/lib/intel_io.h
> index 1cfe4fb6b9..ea2649d9bc 100644
> --- a/lib/intel_io.h
> +++ b/lib/intel_io.h
> @@ -49,6 +49,8 @@ struct intel_register_map {
>  
>  struct intel_mmio_data {
>   void *igt_mmio;
> + size_t mmio_size;
> + struct pci_device *dev;
>   struct intel_register_map map;
>   uint32_t pci_device_id;
>   int key;
> @@ -57,7 +59,9 @@ struct intel_mmio_data {
>  
>  void intel_mmio_use_pci_bar(struct intel_mmio_data *mmio_data,
>   struct pci_device *pci_dev);
> +void intel_mmio_unmap_pci_bar(struct intel_mmio_data *mmio_data);
>  void intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char *file);
> +void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data);
>  
>  int intel_register_access_init(struct intel_mmio_data *mmio_data,
>  struct pci_device *pci_dev, int safe, int fd);
> diff --git a/lib/intel_mmio.c b/lib/intel_mmio.c
> index 667a69f5aa..d6ce0ee3ea 100644
> --- a/lib/intel_mmio.c
> +++ b/lib/intel_mmio.c
> @@ -82,6 +82,8 @@ void *igt_global_mmio;
>   * Sets also up mmio_data->igt_mmio to point at the data contained
>   * in @file. This allows the same code to get reused for dumping and decoding
>   * from running hardware as from register dumps.
> + *
> + * Users are expected to call intel_mmio_unmap_dump_file() after use.
>   */
>  void
>  intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char *file)
> @@ -99,11 +101,32 @@ intel_mmio_use_dump_file(struct intel_mmio_data 
> *mmio_data, char *file)
>   igt_fail_on_f(mmio_data->igt_mmio == MAP_FAILED,
> "Couldn't mmap %s\n", file);
>  
> + mmio_data->mmio_size = st.st_size;
>   igt_global_mmio = mmio_data->igt_mmio;
>  
>   close(fd);
>  }
>  
> +/**
> + * intel_mmio_unmap_dump_file:
> + * @mmio_data:  mmio structure for IO operations
> + *
> + * Unmaps a dump file mmapped with intel_mmio_use_dump_file()
> + */
> +void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data)
> +{
> + if (igt_warn_on_f(mmio_data->dev,
> +   "test bug: arg initialized with 
> intel_mmio_use_pci_bar()\n"))
> + return;

Please add a global description about this kind of errors, this
one is for using unmap when mmio was mmap-ed from other mmap
type.

> + if (igt_warn_on_f(!mmio_data->mmio_size,
> +   "test bug: arg not initialized\n"))
> + return;

Can we replace this with one check igt_global_mmio != NULL ?
Something like:

if (igt_warn_on_f(!igt_global_mmio,
  "mmio regs not mmap-ed\n"))
return;

Or should we add this before all other checks in unmap functions
and keep this additional check ?

> +
> + igt_global_mmio = NULL;
> + igt_debug_on(munmap(mmio_data->igt_mmio, mmio_data->mmio_size) < 0);
> + mmio_data->mmio_size = 0;
> +}
> +
>  /**
>   * intel_mmio_use_pci_bar:
>   * @mmio_data:  mmio structure for IO operations
> @@ -112,6 +135,8 @@ intel_mmio_use_dump_file(struct intel_mmio_data 
> *mmio_data, char *file)
>   * Fill a mmio_data stucture with igt_mmio to point at the mmio bar.
>   *
>   * @pci_dev can be obtained from intel_get_pci_device().
> + *
> + * Users are expected to call intel_mmio_unmap_pci_bar() after use.
>   */
>  void
>  intel_mmio_use_pci_bar(struct intel_mmio_data *mmio_data, struct pci_device 
> *pci_dev)
> @@ -141,10 +166,34 @@ intel_mmio_use_pci_bar(struct intel_mmio_data 
> *mmio_data, struct pci_device *pci
> PCI_DEV_MAP_FLAG_WRITABLE,
>   

[Intel-gfx] [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread Jarkko Sakkinen
For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
to use that for initializing the device memory by providing a new callback
f_ops->populate() for the purpose.

SGX patches are provided to show the callback in context.

An obvious alternative is a ioctl but it is less elegant and requires
two syscalls (mmap + ioctl) per memory range, instead of just one
(mmap).

Jarkko Sakkinen (3):
  mm: Add f_ops->populate()
  x86/sgx: Export sgx_encl_page_alloc()
  x86/sgx: Implement EAUG population with MAP_POPULATE

 arch/mips/kernel/vdso.c|   2 +-
 arch/x86/kernel/cpu/sgx/driver.c   | 129 +
 arch/x86/kernel/cpu/sgx/encl.c |  38 ++
 arch/x86/kernel/cpu/sgx/encl.h |   3 +
 arch/x86/kernel/cpu/sgx/ioctl.c|  38 --
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |   2 +-
 fs/coda/file.c |   2 +-
 fs/overlayfs/file.c|   2 +-
 include/linux/fs.h |  12 +-
 include/linux/mm.h |   2 +-
 ipc/shm.c  |   2 +-
 mm/mmap.c  |  10 +-
 mm/nommu.c |   4 +-
 13 files changed, 193 insertions(+), 53 deletions(-)

-- 
2.35.1



[Intel-gfx] [PATCH RFC 3/3] x86/sgx: Implement EAUG population with MAP_POPULATE

2022-03-07 Thread Jarkko Sakkinen
With SGX1 an enclave needs to be created with its maximum memory demands
pre-allocated. Pages cannot be added to an enclave after it is initialized.
SGX2 introduces a new function, ENCLS[EAUG] for adding pages to an
initialized enclave.

Add support for dynamically adding pages to an initialized enclave with
mmap() by populating pages with EAUG. Use f_ops->populate() callback to
achieve this behaviour.

Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/cpu/sgx/driver.c | 129 +++
 1 file changed, 129 insertions(+)

diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index aa9b8b868867..0e97e7476076 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -9,6 +9,7 @@
 #include 
 #include "driver.h"
 #include "encl.h"
+#include "encls.h"
 
 u64 sgx_attributes_reserved_mask;
 u64 sgx_xfrm_reserved_mask = ~0x3;
@@ -101,6 +102,133 @@ static int sgx_mmap(struct file *file, struct 
vm_area_struct *vma)
return 0;
 }
 
+static int sgx_encl_augment_page(struct sgx_encl *encl, unsigned long offset)
+{
+   struct sgx_pageinfo pginfo = {0};
+   struct sgx_encl_page *encl_page;
+   struct sgx_epc_page *epc_page;
+   struct sgx_va_page *va_page;
+   u64 secinfo_flags;
+   int ret;
+
+   /*
+* Ignore internal permission checking for dynamically added pages.
+* They matter only for data added during the pre-initialization phase.
+* The enclave decides the permissions by the means of EACCEPT,
+* EACCEPTCOPY and EMODPE.
+*/
+   secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
+   encl_page = sgx_encl_page_alloc(encl, offset, secinfo_flags);
+   if (IS_ERR(encl_page))
+   return PTR_ERR(encl_page);
+
+   epc_page = sgx_alloc_epc_page(encl_page, true);
+   if (IS_ERR(epc_page)) {
+   ret = PTR_ERR(epc_page);
+   goto err_alloc_epc_page;
+   }
+
+   va_page = sgx_encl_grow(encl);
+   if (IS_ERR(va_page)) {
+   ret = PTR_ERR(va_page);
+   goto err_grow;
+   }
+
+   mutex_lock(>lock);
+
+   /*
+* Adding to encl->va_pages must be done under encl->lock.  Ditto for
+* deleting (via sgx_encl_shrink()) in the error path.
+*/
+   if (va_page)
+   list_add(_page->list, >va_pages);
+
+   /*
+* Insert prior to EADD in case of OOM.  EADD modifies MRENCLAVE, i.e.
+* can't be gracefully unwound, while failure on EADD/EXTEND is limited
+* to userspace errors (or kernel/hardware bugs).
+*/
+   ret = xa_insert(>page_array, PFN_DOWN(encl_page->desc),
+   encl_page, GFP_KERNEL);
+
+   /*
+* If ret == -EBUSY then page was created in another flow while
+* running without encl->lock
+*/
+   if (ret)
+   goto err_xa_insert;
+
+   pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
+   pginfo.addr = encl_page->desc & PAGE_MASK;
+   pginfo.metadata = 0;
+
+   ret = __eaug(, sgx_get_epc_virt_addr(epc_page));
+   if (ret)
+   goto err_eaug;
+
+   encl_page->encl = encl;
+   encl_page->epc_page = epc_page;
+   encl_page->type = SGX_PAGE_TYPE_REG;
+   encl->secs_child_cnt++;
+
+   sgx_mark_page_reclaimable(encl_page->epc_page);
+
+   mutex_unlock(>lock);
+
+   return 0;
+
+err_eaug:
+   xa_erase(>page_array, PFN_DOWN(encl_page->desc));
+
+err_xa_insert:
+   sgx_encl_shrink(encl, va_page);
+   mutex_unlock(>lock);
+
+err_grow:
+   sgx_encl_free_epc_page(epc_page);
+
+err_alloc_epc_page:
+   kfree(encl_page);
+
+   return VM_FAULT_SIGBUS;
+}
+
+/*
+ * Add new pages to the enclave sequentially with ENCLS[EAUG]. Note that
+ * sgx_mmap() validates that the given VMA is within the enclave range. Calling
+ * here sgx_encl_may_map() second time would too time consuming.
+ */
+static int sgx_populate(struct file *file, struct vm_area_struct *vma)
+{
+   unsigned long length = vma->vm_end - vma->vm_start;
+   struct sgx_encl *encl = file->private_data;
+   unsigned long start = encl->base - vma->vm_start;
+   unsigned long pos;
+   int ret;
+
+   /* EAUG works only for initialized enclaves. */
+   if (!test_bit(SGX_ENCL_INITIALIZED, >flags))
+   return -EINVAL;
+
+   for (pos = 0 ; pos < length; pos += PAGE_SIZE) {
+   if (signal_pending(current)) {
+   if (!pos)
+   ret = -ERESTARTSYS;
+
+   break;
+   }
+
+   if (need_resched())
+   cond_resched();
+
+   ret = sgx_encl_augment_page(encl, start + pos);
+   if (ret)
+   break;
+   }
+
+   return ret;
+}
+
 static unsigned long sgx_get_unmapped_area(struct file *file,
 

Re: [Intel-gfx] [PATCH RFC] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > initialize the device memory in some specific manner. SGX driver can use
> > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > page in the address range.
> > 
> > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > it conditionally called inside call_mmap(). Update call sites
> > accodingly.
> 
> Your device driver has a ->mmap operation.  Why does it need another
> one?  More explanation required here.

f_ops->mmap() would require an additional parameter, which results
heavy refactoring.

struct file_operations has 1125 references in the kernel tree, so I
decided to check this way around first. 

BR, Jarkko


[Intel-gfx] [PATCH RFC] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen 
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 10 --
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(>c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..fb90284e1c82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate)
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-   struct list_head *uf);
+   struct list_head *uf, bool do_populate);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,

Re: [Intel-gfx] [PATCH RFC] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 04:19:26AM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 06:11:21AM +0200, Jarkko Sakkinen wrote:
> > On Sun, Mar 06, 2022 at 03:52:12AM +, Matthew Wilcox wrote:
> > > On Sun, Mar 06, 2022 at 05:21:11AM +0200, Jarkko Sakkinen wrote:
> > > > On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> > > > > On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > > > > > Sometimes you might want to use MAP_POPULATE to ask a device driver 
> > > > > > to
> > > > > > initialize the device memory in some specific manner. SGX driver 
> > > > > > can use
> > > > > > this to request more memory by issuing ENCLS[EAUG] x86 opcode for 
> > > > > > each
> > > > > > page in the address range.
> > > > > > 
> > > > > > Add f_ops->populate() with the same parameters as f_ops->mmap() and 
> > > > > > make
> > > > > > it conditionally called inside call_mmap(). Update call sites
> > > > > > accodingly.
> > > > > 
> > > > > Your device driver has a ->mmap operation.  Why does it need another
> > > > > one?  More explanation required here.
> > > > 
> > > > f_ops->mmap() would require an additional parameter, which results
> > > > heavy refactoring.
> > > > 
> > > > struct file_operations has 1125 references in the kernel tree, so I
> > > > decided to check this way around first. 
> > > 
> > > Are you saying that your device driver behaves differently if
> > > MAP_POPULATE is set versus if it isn't?  That seems hideously broken.
> > 
> > MAP_POPULATE does not do anything (according to __mm_populate in mm/gup.c)
> > with VMA's that have some sort of device/IO memory, i.e. vm_flags
> > intersecting with VM_PFNMAP | VM_IO.
> > 
> > I can extend the guard obviously to:
> > 
> > if (!ret && do_populate && file->f_op->populate &&
> > !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> > file->f_op->populate(file, vma);
> 
> Are you deliberately avoiding the question?  I'm not asking about
> implementation.  I'm asking about the semantics of MAP_POPULATE with
> your driver.

No. I just noticed a bug in the guard from your comment that I wanted
point out.

With the next version I post the corresponding change to the driver,
in order to see this in context.

BR, Jarkko


[Intel-gfx] [PATCH RFC 2/3] x86/sgx: Export sgx_encl_page_alloc()

2022-03-07 Thread Jarkko Sakkinen
Move sgx_encl_page_alloc() to encl.c and export it so that it can be
used in the implementation for MAP_POPULATE, which requires to allocate
new enclave pages.

Signed-off-by: Jarkko Sakkinen 
---
 arch/x86/kernel/cpu/sgx/encl.c  | 38 +
 arch/x86/kernel/cpu/sgx/encl.h  |  3 +++
 arch/x86/kernel/cpu/sgx/ioctl.c | 38 -
 3 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 89aeed798ffb..79e39bd99c09 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -914,6 +914,44 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
return ret;
 }
 
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags)
+{
+   struct sgx_encl_page *encl_page;
+   unsigned long prot;
+
+   encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
+   if (!encl_page)
+   return ERR_PTR(-ENOMEM);
+
+   encl_page->desc = encl->base + offset;
+   encl_page->encl = encl;
+
+   prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
+  _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
+  _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
+
+   /*
+* TCS pages must always RW set for CPU access while the SECINFO
+* permissions are *always* zero - the CPU ignores the user provided
+* values and silently overwrites them with zero permissions.
+*/
+   if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
+   prot |= PROT_READ | PROT_WRITE;
+
+   /* Calculate maximum of the VM flags for the page. */
+   encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
+
+   /*
+* At time of allocation, the runtime protection bits are the same
+* as the maximum protection bits.
+*/
+   encl_page->vm_run_prot_bits = encl_page->vm_max_prot_bits;
+
+   return encl_page;
+}
+
 /**
  * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
  * @encl: the enclave
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 1b6ce1da7c92..3df0d3faf3a1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -113,6 +113,9 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned 
long page_index,
 void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
 int sgx_encl_test_and_clear_young(struct mm_struct *mm,
  struct sgx_encl_page *page);
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+ unsigned long offset,
+ u64 secinfo_flags);
 void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr);
 struct sgx_epc_page *sgx_alloc_va_page(void);
 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index d8c3c07badb3..3e3ca27a6f72 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -169,44 +169,6 @@ static long sgx_ioc_enclave_create(struct sgx_encl *encl, 
void __user *arg)
return ret;
 }
 
-static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
-unsigned long offset,
-u64 secinfo_flags)
-{
-   struct sgx_encl_page *encl_page;
-   unsigned long prot;
-
-   encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
-   if (!encl_page)
-   return ERR_PTR(-ENOMEM);
-
-   encl_page->desc = encl->base + offset;
-   encl_page->encl = encl;
-
-   prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
-  _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
-  _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
-
-   /*
-* TCS pages must always RW set for CPU access while the SECINFO
-* permissions are *always* zero - the CPU ignores the user provided
-* values and silently overwrites them with zero permissions.
-*/
-   if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
-   prot |= PROT_READ | PROT_WRITE;
-
-   /* Calculate maximum of the VM flags for the page. */
-   encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
-
-   /*
-* At time of allocation, the runtime protection bits are the same
-* as the maximum protection bits.
-*/
-   encl_page->vm_run_prot_bits = encl_page->vm_max_prot_bits;
-
-   return encl_page;
-}
-
 static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
 {
u64 perm = secinfo->flags & 

[Intel-gfx] [PATCH RFC 1/3] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.
---
Signed-off-by: Jarkko Sakkinen 
v3:
-   if (!ret && do_populate && file->f_op->populate)
+   if (!ret && do_populate && file->f_op->populate &&
+   !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
(reported by Matthew Wilcox)
v2:
-   if (!ret && do_populate)
+   if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 12 ++--
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(>c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..2909e2d14af8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1993,6 +1994,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2076,15 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate && file->f_op->populate &&
+   !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ 

Re: [Intel-gfx] [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread 'Jarkko Sakkinen'
On Sun, Mar 06, 2022 at 08:30:14AM +, David Laight wrote:
> From: Jarkko Sakkinen
> > Sent: 06 March 2022 05:32
> > 
> > For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
> > to use that for initializing the device memory by providing a new callback
> > f_ops->populate() for the purpose.
> > 
> > SGX patches are provided to show the callback in context.
> > 
> > An obvious alternative is a ioctl but it is less elegant and requires
> > two syscalls (mmap + ioctl) per memory range, instead of just one
> > (mmap).
> 
> Is this all about trying to stop the vm_operations_struct.fault()
> function being called?

In SGX protected memory is actually encrypted normal memory and CPU access
control semantics (marked as reserved, e.g. struct page's).

In SGX you need call ENCLS[EAUG] outside the protected memory to add new
pages to the protected memory. Then when CPU is executing inside this
protected memory, also known as enclaves, it commits the memory as part of
the enclave either with ENCLU[EACCEPT] or ENCLU[EACCEPTCOPY].

So the point is not prevent page faults but to prepare the memory for
pending state so that the enclave can then accept them without round-trips,
and in some cases thus improve performance (in our case in enarx.dev
platform that we are developing).

In fact, #PF handler in SGX driver in the current SGX2 patch set also does
EAUG on-demand. Optimal is to have both routes available. And said, this
can be of course also implemented as ioctl.

BR, Jarkko


[Intel-gfx] [PATCH RFC] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen 
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 10 --
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(>c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..fb90284e1c82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate)
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-   struct list_head *uf);
+   struct list_head *uf, bool do_populate);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,

Re: [Intel-gfx] [PATCH RFC 1/3] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 11:01:36AM +0100, Greg Kroah-Hartman wrote:
> On Sun, Mar 06, 2022 at 07:32:05AM +0200, Jarkko Sakkinen wrote:
> > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > initialize the device memory in some specific manner. SGX driver can use
> > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > page in the address range.
> > 
> > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > it conditionally called inside call_mmap(). Update call sites
> > accodingly.
> > ---
> > Signed-off-by: Jarkko Sakkinen 
> > v3:
> > -   if (!ret && do_populate && file->f_op->populate)
> > +   if (!ret && do_populate && file->f_op->populate &&
> > +   !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> > (reported by Matthew Wilcox)
> > v2:
> > -   if (!ret && do_populate)
> > +   if (!ret && do_populate && file->f_op->populate)
> > (reported by Jan Harkes)
> > ---
> >  arch/mips/kernel/vdso.c|  2 +-
> >  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
> >  fs/coda/file.c |  2 +-
> >  fs/overlayfs/file.c|  2 +-
> >  include/linux/fs.h | 12 ++--
> >  include/linux/mm.h |  2 +-
> >  ipc/shm.c  |  2 +-
> >  mm/mmap.c  | 10 +-
> >  mm/nommu.c |  4 ++--
> >  9 files changed, 23 insertions(+), 15 deletions(-)
> > 
> > diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
> > index 3d0cf471f2fe..89f3f3da9abd 100644
> > --- a/arch/mips/kernel/vdso.c
> > +++ b/arch/mips/kernel/vdso.c
> > @@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm 
> > *bprm, int uses_interp)
> > base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
> > VM_READ | VM_EXEC |
> > VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
> > -   0, NULL);
> > +   0, NULL, false);
> > if (IS_ERR_VALUE(base)) {
> > ret = base;
> > goto out;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > index 1b526039a60d..4c71f64d6a79 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > @@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf 
> > *dma_buf, struct vm_area_struct *
> > if (!obj->base.filp)
> > return -ENODEV;
> >  
> > -   ret = call_mmap(obj->base.filp, vma);
> > +   ret = call_mmap(obj->base.filp, vma, false);
> > if (ret)
> > return ret;
> >  
> > diff --git a/fs/coda/file.c b/fs/coda/file.c
> > index 29dd87be2fb8..e14f312fdbf8 100644
> > --- a/fs/coda/file.c
> > +++ b/fs/coda/file.c
> > @@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
> > vm_area_struct *vma)
> > spin_unlock(>c_lock);
> >  
> > vma->vm_file = get_file(host_file);
> > -   ret = call_mmap(vma->vm_file, vma);
> > +   ret = call_mmap(vma->vm_file, vma, false);
> >  
> > if (ret) {
> > /* if call_mmap fails, our caller will put host_file so we
> > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > index fa125feed0ff..b963a9397e80 100644
> > --- a/fs/overlayfs/file.c
> > +++ b/fs/overlayfs/file.c
> > @@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
> > vm_area_struct *vma)
> > vma_set_file(vma, realfile);
> >  
> > old_cred = ovl_override_creds(file_inode(file)->i_sb);
> > -   ret = call_mmap(vma->vm_file, vma);
> > +   ret = call_mmap(vma->vm_file, vma, false);
> > revert_creds(old_cred);
> > ovl_file_accessed(file);
> >  
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index e2d892b201b0..2909e2d14af8 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -42,6 +42,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  
> >  #include 
> >  #include 
> > @@ -1993,6 +1994,7 @@ struct file_operations {
> > long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
> > long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
> > int (*mmap) (struct file *, struct vm_area_struct *);
> > +   int (*populate)(struct file *, struct vm_area_struct *);
> > unsigned long mmap_supported_flags;
> > int (*open) (struct inode *, struct file *);
> > int (*flush) (struct file *, fl_owner_t id);
> > @@ -2074,9 +2076,15 @@ static inline ssize_t call_write_iter(struct file 
> > *file, struct kiocb *kio,
> > return file->f_op->write_iter(kio, iter);
> >  }
> >  
> > -static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
> > +static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
> > bool do_populate)
> >  {
> > -   return 

Re: [Intel-gfx] [PATCH RFC] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 03:52:12AM +, Matthew Wilcox wrote:
> On Sun, Mar 06, 2022 at 05:21:11AM +0200, Jarkko Sakkinen wrote:
> > On Sun, Mar 06, 2022 at 02:57:55AM +, Matthew Wilcox wrote:
> > > On Sun, Mar 06, 2022 at 04:15:33AM +0200, Jarkko Sakkinen wrote:
> > > > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > > > initialize the device memory in some specific manner. SGX driver can use
> > > > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > > > page in the address range.
> > > > 
> > > > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > > > it conditionally called inside call_mmap(). Update call sites
> > > > accodingly.
> > > 
> > > Your device driver has a ->mmap operation.  Why does it need another
> > > one?  More explanation required here.
> > 
> > f_ops->mmap() would require an additional parameter, which results
> > heavy refactoring.
> > 
> > struct file_operations has 1125 references in the kernel tree, so I
> > decided to check this way around first. 
> 
> Are you saying that your device driver behaves differently if
> MAP_POPULATE is set versus if it isn't?  That seems hideously broken.

MAP_POPULATE does not do anything (according to __mm_populate in mm/gup.c)
with VMA's that have some sort of device/IO memory, i.e. vm_flags
intersecting with VM_PFNMAP | VM_IO.

I can extend the guard obviously to:

if (!ret && do_populate && file->f_op->populate &&
!!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
file->f_op->populate(file, vma);

BR, Jarkko


Re: [Intel-gfx] [PATCH RFC 1/3] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
On Sun, Mar 06, 2022 at 07:03:00PM +0200, Jarkko Sakkinen wrote:
> On Sun, Mar 06, 2022 at 11:01:36AM +0100, Greg Kroah-Hartman wrote:
> > On Sun, Mar 06, 2022 at 07:32:05AM +0200, Jarkko Sakkinen wrote:
> > > Sometimes you might want to use MAP_POPULATE to ask a device driver to
> > > initialize the device memory in some specific manner. SGX driver can use
> > > this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
> > > page in the address range.
> > > 
> > > Add f_ops->populate() with the same parameters as f_ops->mmap() and make
> > > it conditionally called inside call_mmap(). Update call sites
> > > accodingly.
> > > ---
> > > Signed-off-by: Jarkko Sakkinen 
> > > v3:
> > > -   if (!ret && do_populate && file->f_op->populate)
> > > +   if (!ret && do_populate && file->f_op->populate &&
> > > +   !!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> > > (reported by Matthew Wilcox)
> > > v2:
> > > -   if (!ret && do_populate)
> > > +   if (!ret && do_populate && file->f_op->populate)
> > > (reported by Jan Harkes)
> > > ---
> > >  arch/mips/kernel/vdso.c|  2 +-
> > >  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
> > >  fs/coda/file.c |  2 +-
> > >  fs/overlayfs/file.c|  2 +-
> > >  include/linux/fs.h | 12 ++--
> > >  include/linux/mm.h |  2 +-
> > >  ipc/shm.c  |  2 +-
> > >  mm/mmap.c  | 10 +-
> > >  mm/nommu.c |  4 ++--
> > >  9 files changed, 23 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
> > > index 3d0cf471f2fe..89f3f3da9abd 100644
> > > --- a/arch/mips/kernel/vdso.c
> > > +++ b/arch/mips/kernel/vdso.c
> > > @@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm 
> > > *bprm, int uses_interp)
> > >   base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
> > >   VM_READ | VM_EXEC |
> > >   VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
> > > - 0, NULL);
> > > + 0, NULL, false);
> > >   if (IS_ERR_VALUE(base)) {
> > >   ret = base;
> > >   goto out;
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
> > > b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > index 1b526039a60d..4c71f64d6a79 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> > > @@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf 
> > > *dma_buf, struct vm_area_struct *
> > >   if (!obj->base.filp)
> > >   return -ENODEV;
> > >  
> > > - ret = call_mmap(obj->base.filp, vma);
> > > + ret = call_mmap(obj->base.filp, vma, false);
> > >   if (ret)
> > >   return ret;
> > >  
> > > diff --git a/fs/coda/file.c b/fs/coda/file.c
> > > index 29dd87be2fb8..e14f312fdbf8 100644
> > > --- a/fs/coda/file.c
> > > +++ b/fs/coda/file.c
> > > @@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
> > > vm_area_struct *vma)
> > >   spin_unlock(>c_lock);
> > >  
> > >   vma->vm_file = get_file(host_file);
> > > - ret = call_mmap(vma->vm_file, vma);
> > > + ret = call_mmap(vma->vm_file, vma, false);
> > >  
> > >   if (ret) {
> > >   /* if call_mmap fails, our caller will put host_file so we
> > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > index fa125feed0ff..b963a9397e80 100644
> > > --- a/fs/overlayfs/file.c
> > > +++ b/fs/overlayfs/file.c
> > > @@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
> > > vm_area_struct *vma)
> > >   vma_set_file(vma, realfile);
> > >  
> > >   old_cred = ovl_override_creds(file_inode(file)->i_sb);
> > > - ret = call_mmap(vma->vm_file, vma);
> > > + ret = call_mmap(vma->vm_file, vma, false);
> > >   revert_creds(old_cred);
> > >   ovl_file_accessed(file);
> > >  
> > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > index e2d892b201b0..2909e2d14af8 100644
> > > --- a/include/linux/fs.h
> > > +++ b/include/linux/fs.h
> > > @@ -42,6 +42,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  
> > >  #include 
> > >  #include 
> > > @@ -1993,6 +1994,7 @@ struct file_operations {
> > >   long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
> > >   long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
> > >   int (*mmap) (struct file *, struct vm_area_struct *);
> > > + int (*populate)(struct file *, struct vm_area_struct *);
> > >   unsigned long mmap_supported_flags;
> > >   int (*open) (struct inode *, struct file *);
> > >   int (*flush) (struct file *, fl_owner_t id);
> > > @@ -2074,9 +2076,15 @@ static inline ssize_t call_write_iter(struct file 
> > > *file, struct kiocb *kio,
> > >   return file->f_op->write_iter(kio, iter);
> > 

[Intel-gfx] [PATCH RFC v2] mm: Add f_ops->populate()

2022-03-07 Thread Jarkko Sakkinen
Sometimes you might want to use MAP_POPULATE to ask a device driver to
initialize the device memory in some specific manner. SGX driver can use
this to request more memory by issuing ENCLS[EAUG] x86 opcode for each
page in the address range.

Add f_ops->populate() with the same parameters as f_ops->mmap() and make
it conditionally called inside call_mmap(). Update call sites
accodingly.

Signed-off-by: Jarkko Sakkinen 
---
v2:
-   if (!ret && do_populate)
+   if (!ret && do_populate && file->f_op->populate)
(reported by Jan Harkes)
---
 arch/mips/kernel/vdso.c|  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |  2 +-
 fs/coda/file.c |  2 +-
 fs/overlayfs/file.c|  2 +-
 include/linux/fs.h | 10 --
 include/linux/mm.h |  2 +-
 ipc/shm.c  |  2 +-
 mm/mmap.c  | 10 +-
 mm/nommu.c |  4 ++--
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 3d0cf471f2fe..89f3f3da9abd 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -102,7 +102,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, 
int uses_interp)
base = mmap_region(NULL, STACK_TOP, PAGE_SIZE,
VM_READ | VM_EXEC |
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-   0, NULL);
+   0, NULL, false);
if (IS_ERR_VALUE(base)) {
ret = base;
goto out;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1b526039a60d..4c71f64d6a79 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -107,7 +107,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
struct vm_area_struct *
if (!obj->base.filp)
return -ENODEV;
 
-   ret = call_mmap(obj->base.filp, vma);
+   ret = call_mmap(obj->base.filp, vma, false);
if (ret)
return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 29dd87be2fb8..e14f312fdbf8 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -173,7 +173,7 @@ coda_file_mmap(struct file *coda_file, struct 
vm_area_struct *vma)
spin_unlock(>c_lock);
 
vma->vm_file = get_file(host_file);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
 
if (ret) {
/* if call_mmap fails, our caller will put host_file so we
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..b963a9397e80 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -503,7 +503,7 @@ static int ovl_mmap(struct file *file, struct 
vm_area_struct *vma)
vma_set_file(vma, realfile);
 
old_cred = ovl_override_creds(file_inode(file)->i_sb);
-   ret = call_mmap(vma->vm_file, vma);
+   ret = call_mmap(vma->vm_file, vma, false);
revert_creds(old_cred);
ovl_file_accessed(file);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2d892b201b0..4c6a3339373d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1993,6 +1993,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+   int (*populate)(struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
@@ -2074,9 +2075,14 @@ static inline ssize_t call_write_iter(struct file *file, 
struct kiocb *kio,
return file->f_op->write_iter(kio, iter);
 }
 
-static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma, 
bool do_populate)
 {
-   return file->f_op->mmap(file, vma);
+   int ret = file->f_op->mmap(file, vma);
+
+   if (!ret && do_populate && file->f_op->populate)
+   ret = file->f_op->populate(file, vma);
+
+   return ret;
 }
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..6c8c036f423b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2683,7 +2683,7 @@ extern unsigned long get_unmapped_area(struct file *, 
unsigned long, unsigned lo
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-   struct list_head *uf);
+   struct list_head *uf, bool do_populate);
 

Re: [Intel-gfx] [PATCH 7/8] drm/i915: fixup the initial fb base on DG1

2022-03-07 Thread Matthew Auld

On 04/03/2022 19:33, Ville Syrjälä wrote:

On Fri, Mar 04, 2022 at 05:23:32PM +, Matthew Auld wrote:

The offset we get looks to be the exact start of DSM, but the
inital_plane_vma expects the address to be relative.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
  .../drm/i915/display/intel_plane_initial.c| 22 +++
  1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c 
b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index f797fcef18fc..b39d3a8dfe45 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -56,10 +56,24 @@ initial_plane_vma(struct drm_i915_private *i915,
if (!mem || plane_config->size == 0)
return NULL;
  
-	base = round_down(plane_config->base,

- I915_GTT_MIN_ALIGNMENT);
-   size = round_up(plane_config->base + plane_config->size,
-   mem->min_page_size);
+   base = plane_config->base;
+   if (IS_DGFX(i915)) {
+   /*
+* On discrete the base address should be somewhere in LMEM, but
+* depending on the size of LMEM the base address might
+* intersect with the start of DSM, like on DG1, in which case
+* we need the relative address. In such cases we might also
+* need to choose between inital fb vs fbc, if space is limited.
+*
+* On future discrete HW, like DG2, we should be able to just
+* allocate directly from LMEM, due to larger LMEM size.
+*/
+   if (base >= i915->dsm.start)
+   base -= i915->dsm.start;


Subsequent code expects the object to actually be inside stolen.
If that is not the case we should just give up.


Thanks for taking a look at this. Is that subsequent code outside 
initial_plane_vma()? In the next patch this is now using LMEM directly 
for dg2. Would that blow up somewhere else?




The fact that we fail to confirm any of that on integrated
parts has always bugged me, but not enough to actually do
anything about it. Such a check would be somewhat more involved
since we'd have to look at the PTEs. But on discrete sounds like
we can get away with a trivial check.


Which PTEs? Is this for the below GGTT bind? I would have assumed that 
the create_at/for_preallocated would simply refuse to allocate the pages 
if the requested range is outside the regions usable range? Or maybe 
there is more going on behind the scenes here?





+   }
+
+   size = roundup(base + plane_config->size, mem->min_page_size);
+   base = round_down(base, I915_GTT_MIN_ALIGNMENT);
size -= base;
  
  	/*

--
2.34.1




Re: [Intel-gfx] [PATCH RFC 0/3] MAP_POPULATE for device memory

2022-03-07 Thread David Hildenbrand
On 06.03.22 06:32, Jarkko Sakkinen wrote:
> For device memory (aka VM_IO | VM_PFNMAP) MAP_POPULATE does nothing. Allow
> to use that for initializing the device memory by providing a new callback
> f_ops->populate() for the purpose.
> 
> SGX patches are provided to show the callback in context.
> 
> An obvious alternative is a ioctl but it is less elegant and requires
> two syscalls (mmap + ioctl) per memory range, instead of just one
> (mmap).

What about extending MADV_POPULATE_READ | MADV_POPULATE_WRITE to support
VM_IO | VM_PFNMAP (as well?) ?


-- 
Thanks,

David / dhildenb



Re: [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/mm: Add an iterator to optimally walk over holes suitable for an allocation

2022-03-07 Thread Tvrtko Ursulin




On 06/03/2022 00:13, Patchwork wrote:


-:157: CHECK:MACRO_ARG_PRECEDENCE: Macro argument 'mode' may be better as 
'(mode)' to avoid precedence issues
#157: FILE: include/drm/drm_mm.h:430:
+#define drm_mm_for_each_suitable_hole(pos, mm, range_start, range_end, \
+ size, mode) \
+   for (pos = __drm_mm_first_hole(mm, range_start, range_end, size, \
+  mode & ~DRM_MM_INSERT_ONCE); \
+pos; \
+pos = mode & DRM_MM_INSERT_ONCE ? \
+NULL : __drm_mm_next_hole(mm, pos, size, \
+  mode & ~DRM_MM_INSERT_ONCE))


CI results are good I think but please do fix this warning.

Regards,

Tvrtko


Re: [Intel-gfx] [PATCH v2 2/2] drm/i915/dg2: Add debugfs to control global preemption setting

2022-03-07 Thread Tvrtko Ursulin



On 04/03/2022 23:46, Matt Roper wrote:

From: Akeem G Abodunrin 

Since DG2 and beyond only support global preemption enable/disable (see
Wa_14015141709), userspace no longer has a way to control preemption on
a per-context basis.  Preemption is globally enabled by default, but the
UMD teams have requested that we provide a debugfs interface that can be
used to query and adjust the system-wide preemption setting for
development and bug reporting purposes.


I guess most distros enable debugfs, anyone knows for sure? Otherwise 
the bug reporting use case could be questionable.


And UMD acks would be desirable here I'd say.


v2 (MattR):
  - Split debugfs out into a separate patch.  (Jani)
  - Add the hardware update/query as facilities in intel_gt.c and just
call them from the debugfs code.  (Jani)
  - Add register to GuC's save/restore list so that the value will
persist across resets.  (Tvrtko)
  - Place under per-GT debugfs rather than i915 debugfs.  (MattR)
  - Only register debugfs entries on platforms subject to Wa_14015141709,
and only on platforms that have an RCS engine.  (MattR/Tvrtko)

Cc: Matt Roper 
Cc: Prathap Kumar Valsan 
Cc: John Harrison 
Cc: Joonas Lahtinen 
Cc: Jani Nikula 
Cc: Tvrtko Ursulin 
Signed-off-by: Akeem G Abodunrin 
Signed-off-by: Matt Roper 
---
  drivers/gpu/drm/i915/gt/intel_gt.c | 50 ++
  drivers/gpu/drm/i915/gt/intel_gt.h |  3 ++
  drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 31 ++
  drivers/gpu/drm/i915/gt/intel_gt_regs.h|  3 ++
  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  7 +++
  5 files changed, 94 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8a2483ccbfb9..90bdebd8d267 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1045,3 +1045,53 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
mutex_unlock(>tlb_invalidate_lock);
  }
+
+/**
+ * intel_gt_get_global_preemption - return whether the global preemption
+ * setting is enabled in hardware
+ * @gt: GT structure
+ *
+ * Returns the hardware's global 'preemption enabled' setting.  Only relevant
+ * on newer platforms that lack per-context preemption control (and only on
+ * GTs that have a render engine).
+ *
+ * Returns 1 if preemption is enabled, 0 if disabled.
+ */
+u64 intel_gt_get_global_preemption(struct intel_gt *gt)
+{
+   intel_wakeref_t wakeref;
+   u32 val;
+
+   drm_WARN_ON(>i915->drm, GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 
55));
+   drm_WARN_ON(>i915->drm, RCS_MASK(gt) == 0);
+
+   with_intel_runtime_pm(>i915->runtime_pm, wakeref)
+   val = intel_uncore_read(gt->uncore, 
GEN12_VFG_PREEMPTION_CHICKEN);
+
+   return !(val & GEN12_VFG_PREEMPT_CHICKEN_DISABLE);
+}
+
+/**
+ * intel_gt_set_global_preemption - adjust global preemption enabled setting
+ * @gt: GT structure
+ * @val: desired preemption setting
+ *
+ * Enables global preemption if @val is non-zero, otherwise disables it.  Only
+ * relevant on newer platforms that lack per-context preemption control (and
+ * only on GTs that have a render engine).
+ *
+ * Returns 1 if preemption is enabled, 0 if disabled.
+ */
+void intel_gt_set_global_preemption(struct intel_gt *gt, u64 val)
+{
+   intel_wakeref_t wakeref;
+   u32 tmp = val ?
+   _MASKED_BIT_DISABLE(GEN12_VFG_PREEMPT_CHICKEN_DISABLE) :
+   _MASKED_BIT_ENABLE(GEN12_VFG_PREEMPT_CHICKEN_DISABLE);
+
+   drm_WARN_ON(>i915->drm, GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 
55));
+   drm_WARN_ON(>i915->drm, RCS_MASK(gt) == 0);


Bike shedding territory, but as long as these checks are present in 
release builds, it would be possible to return an error and propagate to 
debugfs caller/return. Not saying to do it just thinking out loud.



+
+   with_intel_runtime_pm(>i915->runtime_pm, wakeref)
+   intel_uncore_write(gt->uncore, GEN12_VFG_PREEMPTION_CHICKEN, 
tmp);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 0f571c8ee22b..63a599a1bf6d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -94,4 +94,7 @@ void intel_gt_watchdog_work(struct work_struct *work);
  
  void intel_gt_invalidate_tlbs(struct intel_gt *gt);
  
+u64 intel_gt_get_global_preemption(struct intel_gt *gt);

+void intel_gt_set_global_preemption(struct intel_gt *gt, u64 val);


Bool based would be nicer unless there is some reason for 64-bits in the 
future.


Regards,

Tvrtko


+
  #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c 
b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
index f103664b71d4..d851e3f80877 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -6,6 +6,7 @@
  #include 
  
  #include "i915_drv.h"

+#include 

Re: [Intel-gfx] [PATCH 2/2] drm/i915/gmbus: use to_intel_gmbus() instead of open coding

2022-03-07 Thread Jani Nikula
On Fri, 04 Mar 2022, Ville Syrjälä  wrote:
> On Fri, Mar 04, 2022 at 12:14:26PM +0200, Jani Nikula wrote:
>> We have a helper for getting at the enclosing gmbus struct from the
>> embedded i2c_adapter, use it.
>> 
>> Signed-off-by: Jani Nikula 
>
> Series is
> Reviewed-by: Ville Syrjälä 

Thanks, pushed.

BR,
Jani.

>
>> ---
>>  drivers/gpu/drm/i915/display/intel_gmbus.c | 18 +-
>>  1 file changed, 5 insertions(+), 13 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c 
>> b/drivers/gpu/drm/i915/display/intel_gmbus.c
>> index 8f26528c3dc7..21281a7bdc17 100644
>> --- a/drivers/gpu/drm/i915/display/intel_gmbus.c
>> +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
>> @@ -300,9 +300,7 @@ static void set_data(void *data, int state_high)
>>  static int
>>  intel_gpio_pre_xfer(struct i2c_adapter *adapter)
>>  {
>> -struct intel_gmbus *bus = container_of(adapter,
>> -   struct intel_gmbus,
>> -   adapter);
>> +struct intel_gmbus *bus = to_intel_gmbus(adapter);
>>  struct drm_i915_private *dev_priv = bus->dev_priv;
>>  
>>  intel_gmbus_reset(dev_priv);
>> @@ -319,9 +317,7 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter)
>>  static void
>>  intel_gpio_post_xfer(struct i2c_adapter *adapter)
>>  {
>> -struct intel_gmbus *bus = container_of(adapter,
>> -   struct intel_gmbus,
>> -   adapter);
>> +struct intel_gmbus *bus = to_intel_gmbus(adapter);
>>  struct drm_i915_private *dev_priv = bus->dev_priv;
>>  
>>  set_data(bus, 1);
>> @@ -619,9 +615,7 @@ static int
>>  do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num,
>>u32 gmbus0_source)
>>  {
>> -struct intel_gmbus *bus = container_of(adapter,
>> -   struct intel_gmbus,
>> -   adapter);
>> +struct intel_gmbus *bus = to_intel_gmbus(adapter);
>>  struct drm_i915_private *dev_priv = bus->dev_priv;
>>  int i = 0, inc, try = 0;
>>  int ret = 0;
>> @@ -751,8 +745,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct 
>> i2c_msg *msgs, int num,
>>  static int
>>  gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
>>  {
>> -struct intel_gmbus *bus =
>> -container_of(adapter, struct intel_gmbus, adapter);
>> +struct intel_gmbus *bus = to_intel_gmbus(adapter);
>>  struct drm_i915_private *dev_priv = bus->dev_priv;
>>  intel_wakeref_t wakeref;
>>  int ret;
>> @@ -776,8 +769,7 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg 
>> *msgs, int num)
>>  
>>  int intel_gmbus_output_aksv(struct i2c_adapter *adapter)
>>  {
>> -struct intel_gmbus *bus =
>> -container_of(adapter, struct intel_gmbus, adapter);
>> +struct intel_gmbus *bus = to_intel_gmbus(adapter);
>>  struct drm_i915_private *dev_priv = bus->dev_priv;
>>  u8 cmd = DRM_HDCP_DDC_AKSV;
>>  u8 buf[DRM_HDCP_KSV_LEN] = { 0 };
>> -- 
>> 2.30.2

-- 
Jani Nikula, Intel Open Source Graphics Center


[Intel-gfx] [PATCH v3 i-g-t] lib/intel_mmio: Fix mmapped resources not unmapped on fini

2022-03-07 Thread Janusz Krzysztofik
Commit 5f3cfa485eb4 ("lib: Use safe wrappers around libpciaccess
initialization functions") took care of not leaking memory allocated by
pci_system_init() but didn't take care of users potentially attempting to
reinitialize global data maintained by libpciaccess.  For example,
intel_register_access_init() mmaps device's PCI BAR0 resource with
pci_device_map_range() but intel_register_access_fini() doesn't unmap it
and next call to intel_register_access_init() fails on attempt to mmap it
again.

Fix it, and also provide intel_mmio_unmap_*() counterparts to public
functions intel_mmio_use_pci_bar() and intel_mmio_use_dump_file().

v2: apply last minute fixes, cached but unfortunately not committed before
sending
v3: use .pci_device_id field content as an indicator of arg initialization
via intel_register_access_init(),
  - improve checks of argument initialization status,
  - shorten warning messages (Kamil),
  - don't fill .mmio_size field until initialization succeeds (Kamil)

Signed-off-by: Janusz Krzysztofik 
Cc: Kamil Konieczny 
---
 lib/intel_io.h   |  4 +++
 lib/intel_mmio.c | 64 +---
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/lib/intel_io.h b/lib/intel_io.h
index 1cfe4fb6b9..ea2649d9bc 100644
--- a/lib/intel_io.h
+++ b/lib/intel_io.h
@@ -49,6 +49,8 @@ struct intel_register_map {
 
 struct intel_mmio_data {
void *igt_mmio;
+   size_t mmio_size;
+   struct pci_device *dev;
struct intel_register_map map;
uint32_t pci_device_id;
int key;
@@ -57,7 +59,9 @@ struct intel_mmio_data {
 
 void intel_mmio_use_pci_bar(struct intel_mmio_data *mmio_data,
struct pci_device *pci_dev);
+void intel_mmio_unmap_pci_bar(struct intel_mmio_data *mmio_data);
 void intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char *file);
+void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data);
 
 int intel_register_access_init(struct intel_mmio_data *mmio_data,
   struct pci_device *pci_dev, int safe, int fd);
diff --git a/lib/intel_mmio.c b/lib/intel_mmio.c
index 667a69f5aa..d6ce0ee3ea 100644
--- a/lib/intel_mmio.c
+++ b/lib/intel_mmio.c
@@ -82,6 +82,8 @@ void *igt_global_mmio;
  * Sets also up mmio_data->igt_mmio to point at the data contained
  * in @file. This allows the same code to get reused for dumping and decoding
  * from running hardware as from register dumps.
+ *
+ * Users are expected to call intel_mmio_unmap_dump_file() after use.
  */
 void
 intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, char *file)
@@ -99,11 +101,32 @@ intel_mmio_use_dump_file(struct intel_mmio_data 
*mmio_data, char *file)
igt_fail_on_f(mmio_data->igt_mmio == MAP_FAILED,
  "Couldn't mmap %s\n", file);
 
+   mmio_data->mmio_size = st.st_size;
igt_global_mmio = mmio_data->igt_mmio;
 
close(fd);
 }
 
+/**
+ * intel_mmio_unmap_dump_file:
+ * @mmio_data:  mmio structure for IO operations
+ *
+ * Unmaps a dump file mmapped with intel_mmio_use_dump_file()
+ */
+void intel_mmio_unmap_dump_file(struct intel_mmio_data *mmio_data)
+{
+   if (igt_warn_on_f(mmio_data->dev,
+ "test bug: arg initialized with 
intel_mmio_use_pci_bar()\n"))
+   return;
+   if (igt_warn_on_f(!mmio_data->mmio_size,
+ "test bug: arg not initialized\n"))
+   return;
+
+   igt_global_mmio = NULL;
+   igt_debug_on(munmap(mmio_data->igt_mmio, mmio_data->mmio_size) < 0);
+   mmio_data->mmio_size = 0;
+}
+
 /**
  * intel_mmio_use_pci_bar:
  * @mmio_data:  mmio structure for IO operations
@@ -112,6 +135,8 @@ intel_mmio_use_dump_file(struct intel_mmio_data *mmio_data, 
char *file)
  * Fill a mmio_data stucture with igt_mmio to point at the mmio bar.
  *
  * @pci_dev can be obtained from intel_get_pci_device().
+ *
+ * Users are expected to call intel_mmio_unmap_pci_bar() after use.
  */
 void
 intel_mmio_use_pci_bar(struct intel_mmio_data *mmio_data, struct pci_device 
*pci_dev)
@@ -141,10 +166,34 @@ intel_mmio_use_pci_bar(struct intel_mmio_data *mmio_data, 
struct pci_device *pci
  PCI_DEV_MAP_FLAG_WRITABLE,
  _data->igt_mmio);
 
-   igt_global_mmio = mmio_data->igt_mmio;
-
igt_fail_on_f(error != 0,
  "Couldn't map MMIO region\n");
+
+   mmio_data->mmio_size = mmio_size;
+   mmio_data->dev = pci_dev;
+   igt_global_mmio = mmio_data->igt_mmio;
+}
+
+/**
+ * intel_mmio_unmap_pci_bar:
+ * @mmio_data:  mmio structure for IO operations
+ *
+ * Unmaps a PCI BAR region mmapped with intel_mmio_use_pci_bar()
+ */
+void intel_mmio_unmap_pci_bar(struct intel_mmio_data *mmio_data)
+{
+   if (igt_warn_on_f(mmio_data->pci_device_id,
+ "test bug: arg initialized with 
intel_register_access_init()\n"))
+   return;
+