> -----Original Message-----
> From: Srivatsa, Anusha <[email protected]>
> Sent: Thursday, October 28, 2021 2:04 PM
> To: Sripada, Radhakrishna <[email protected]>; intel-
> [email protected]
> Subject: RE: [Intel-gfx] [PATCH v9] drm/i915: Update memory bandwidth
> formulae
> 
> Replying to the right patch this time.
> From what the bspec says, the changes look good.
> Minor feedback below inline.
> 
> With that change,
> Reviewed-by: Anusha Srivatsa <[email protected]>
> 
> 
> > -----Original Message-----
> > From: Intel-gfx <[email protected]> On Behalf Of
> > Radhakrishna Sripada
> > Sent: Friday, October 15, 2021 2:01 PM
> > To: [email protected]
> > Subject: [Intel-gfx] [PATCH v9] drm/i915: Update memory bandwidth
> > formulae
> >
> > The formulae has been updated to include more variables. Make sure the
> > code carries the same.
> >
> > Bspec: 64631, 54023
> >
> > v2: Make GEN11 follow the default route and fix calculation of
> >     maxdebw(RK)
> > v3: Fix div by zero on default case
> >     Correct indent for fallthrough(Jani)
> > v4: Fix div by zero on gen11.
> > v5: Fix 0 max_numchannels case
> > v6:
> >     - Split gen11/gen12 algorithms
> >     - Fix RKL deburst value
> >     - Fix difference b/ween ICL and TGL algorithms
> >     - Protect deinterleave from being 0
> >     - Warn when numchannels exceeds max_numchannels
> >     - Fix scaling of clk_max from different units
> >     - s/deinterleave/channelwidth/ in calculating peakbw
> >     - Fix off by one for num_planes TGL+
> >     - Fix SAGV check
> > v7: Fix div by zero error on gen11
> > v8: Even though the algorithm for gen11 says that we need to return
> >     derated bw for a qgv point whose planes are less than no of active
> >     planes, we return 0 for deratedbw when only one plane is allowed.
> >     We modify the algorithm to accommodate the case where no of active
> >     planes are same as the min no of planes supported by a qgv point.
> > v9: Fix dclk scaling for dg1
> >
> > Cc: Ville Syrjälä <[email protected]>
> > Suggested-by: Matt Roper <[email protected]>
> > Signed-off-by: Radhakrishna Sripada <[email protected]>
> > ---
> >  drivers/gpu/drm/i915/display/intel_bw.c | 211 ++++++++++++++++++++----
> >  1 file changed, 179 insertions(+), 32 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c
> > b/drivers/gpu/drm/i915/display/intel_bw.c
> > index 8d9d888e9316..15c006194c85 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.c
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> > @@ -27,6 +27,9 @@ struct intel_qgv_info {
> >     u8 num_points;
> >     u8 num_psf_points;
> >     u8 t_bl;
> > +   u8 max_numchannels;
> > +   u8 channel_width;
> > +   u8 deinterleave;
> >  };
> >
> >  static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private
> > *dev_priv, @@ -42,7 +45,7 @@ static int
> > dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
> >             dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */
> >     else
> >             dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */
> > -   sp->dclk = dclk_ratio * dclk_reference;
> > +   sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) +
> > 500,
> > +1000);
> >
> >     val = intel_uncore_read(&dev_priv->uncore,
> > SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
> >     if (val & DG1_GEAR_TYPE)
> > @@ -69,6 +72,7 @@ static int icl_pcode_read_qgv_point_info(struct
> > drm_i915_private *dev_priv,
> >                                      int point)
> >  {
> >     u32 val = 0, val2 = 0;
> > +   u16 dclk;
> >     int ret;
> >
> >     ret = sandybridge_pcode_read(dev_priv, @@ -78,7 +82,8 @@ static
> > int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> >     if (ret)
> >             return ret;
> >
> > -   sp->dclk = val & 0xffff;
> > +   dclk = val & 0xffff;
> > +   sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv)
> > > 11 ?
> > +500 : 0), 1000);
> >     sp->t_rp = (val & 0xff0000) >> 16;
> >     sp->t_rcd = (val & 0xff000000) >> 24;
> >
> > @@ -133,7 +138,8 @@ int icl_pcode_restrict_qgv_points(struct
> > drm_i915_private *dev_priv,  }
> >
> >  static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > -                         struct intel_qgv_info *qi)
> > +                         struct intel_qgv_info *qi,
> > +                         bool is_y_tile)
> >  {
> >     const struct dram_info *dram_info = &dev_priv->dram_info;
> >     int i, ret;
> > @@ -144,17 +150,41 @@ static int icl_get_qgv_points(struct
> > drm_i915_private *dev_priv,
> >     if (DISPLAY_VER(dev_priv) == 12)
> >             switch (dram_info->type) {
> >             case INTEL_DRAM_DDR4:
> > -                   qi->t_bl = 4;
> > +                   qi->t_bl = is_y_tile ? 8 : 4;
> > +                   qi->max_numchannels = 2;
> > +                   qi->channel_width = 64;
> > +                   qi->deinterleave = is_y_tile ? 1 : 2;
> >                     break;
> >             case INTEL_DRAM_DDR5:
> > -                   qi->t_bl = 8;
> > +                   qi->t_bl = is_y_tile ? 16 : 8;
> > +                   qi->max_numchannels = 4;
> > +                   qi->channel_width = 32;
> > +                   qi->deinterleave = is_y_tile ? 1 : 2;
> > +                   break;
> > +           case INTEL_DRAM_LPDDR4:
> > +                   if (IS_ROCKETLAKE(dev_priv)) {
> > +                           qi->t_bl = 8;
> > +                           qi->max_numchannels = 4;
> > +                           qi->channel_width = 32;
> > +                           qi->deinterleave = 2;
> > +                           break;
> > +                   }
> > +                   fallthrough;
> > +           case INTEL_DRAM_LPDDR5:
> > +                   qi->t_bl = 16;
> > +                   qi->max_numchannels = 8;
> > +                   qi->channel_width = 16;
> > +                   qi->deinterleave = is_y_tile ? 2 : 4;
> >                     break;
> >             default:
> >                     qi->t_bl = 16;
> > +                   qi->max_numchannels = 1;
> >                     break;
> >             }
> > -   else if (DISPLAY_VER(dev_priv) == 11)
> > +   else if (DISPLAY_VER(dev_priv) == 11) {
> >             qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ?
> > 4 : 8;
> > +           qi->max_numchannels = 1;
> > +   }
> >
> >     if (drm_WARN_ON(&dev_priv->drm,
> >                     qi->num_points > ARRAY_SIZE(qi->points))) @@ -
> > 193,12 +223,6 @@ static int icl_get_qgv_points(struct drm_i915_private
> > *dev_priv,
> >     return 0;
> >  }
> >
> > -static int icl_calc_bw(int dclk, int num, int den) -{
> > -   /* multiples of 16.666MHz (100/6) */
> > -   return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > -}
> > -
> >  static int adl_calc_psf_bw(int clk)
> >  {
> >     /*
> > @@ -240,7 +264,7 @@ static const struct intel_sa_info tgl_sa_info = {  };
> >
> >  static const struct intel_sa_info rkl_sa_info = {
> > -   .deburst = 16,
> > +   .deburst = 8,
> >     .deprogbwlimit = 20, /* GB/s */
> >     .displayrtids = 128,
> >     .derating = 10,
> > @@ -265,35 +289,130 @@ static int icl_get_bw_info(struct drm_i915_private
> > *dev_priv, const struct intel
> >     struct intel_qgv_info qi = {};
> >     bool is_y_tile = true; /* assume y tile may be used */
> >     int num_channels = max_t(u8, 1, dev_priv-
> > >dram_info.num_channels);
> > -   int deinterleave;
> > -   int ipqdepth, ipqdepthpch;
> > +   int ipqdepth, ipqdepthpch = 16;
> >     int dclk_max;
> >     int maxdebw;
> > +   int num_groups = ARRAY_SIZE(dev_priv->max_bw);
> >     int i, ret;
> >
> > -   ret = icl_get_qgv_points(dev_priv, &qi);
> > +   ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
> >     if (ret) {
> >             drm_dbg_kms(&dev_priv->drm,
> >                         "Failed to get memory subsystem information,
> > ignoring bandwidth limits");
> >             return ret;
> >     }
> >
> > -   deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> >     dclk_max = icl_sagv_max_dclk(&qi);
> > +   maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10);
> > +   ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +   qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > +
> > +   for (i = 0; i < num_groups; i++) {
> > +           struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +           int clpchgroup;
> > +           int j;
> > +
> > +           clpchgroup = (sa->deburst * qi.deinterleave / num_channels)
> > << i;
> > +           bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > +
> > +           bi->num_qgv_points = qi.num_points;
> > +           bi->num_psf_gv_points = qi.num_psf_points;
> > +
> > +           for (j = 0; j < qi.num_points; j++) {
> The j here can be more descriptive to make it easy to understand. s/j/sagv
> probably?
I am not sure if this case warrants a departure from standard convention of 
naming iterators.
This iterator j is used in the current version of the code and I am skeptical 
about changing it.

Thanks for the review,
Radhakrishna(RK) Sripada
> 
> Thanks,
> Anusha
> > +                   const struct intel_qgv_point *sp = &qi.points[j];
> > +                   int ct, bw;
> > +
> > +                   /*
> > +                    * Max row cycle time
> > +                    *
> > +                    * FIXME what is the logic behind the
> > +                    * assumed burst length?
> > +                    */
> > +                   ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> > +                              (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
> > +                   bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 *
> > num_channels, ct);
> >
> > -   ipqdepthpch = 16;
> > +                   bi->deratedbw[j] = min(maxdebw,
> > +                                          bw * (100 - sa->derating) / 100);
> > +
> > +                   drm_dbg_kms(&dev_priv->drm,
> > +                               "BW%d / QGV %d: num_planes=%d
> > deratedbw=%u\n",
> > +                               i, j, bi->num_planes, bi->deratedbw[j]);
> > +           }
> > +   }
> > +   /*
> > +    * In case if SAGV is disabled in BIOS, we always get 1
> > +    * SAGV point, but we can't send PCode commands to restrict it
> > +    * as it will fail and pointless anyway.
> > +    */
> > +   if (qi.num_points == 1)
> > +           dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
> > +   else
> > +           dev_priv->sagv_status = I915_SAGV_ENABLED;
> > +
> > +   return 0;
> > +}
> > +
> > +static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const
> > +struct intel_sa_info *sa) {
> > +   struct intel_qgv_info qi = {};
> > +   const struct dram_info *dram_info = &dev_priv->dram_info;
> > +   bool is_y_tile = true; /* assume y tile may be used */
> > +   int num_channels = max_t(u8, 1, dev_priv-
> > >dram_info.num_channels);
> > +   int ipqdepth, ipqdepthpch = 16;
> > +   int dclk_max;
> > +   int maxdebw, peakbw;
> > +   int clperchgroup;
> > +   int num_groups = ARRAY_SIZE(dev_priv->max_bw);
> > +   int i, ret;
> > +
> > +   ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
> > +   if (ret) {
> > +           drm_dbg_kms(&dev_priv->drm,
> > +                       "Failed to get memory subsystem information,
> > ignoring bandwidth limits");
> > +           return ret;
> > +   }
> > +
> > +   if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type ==
> > INTEL_DRAM_LPDDR5)
> > +           num_channels *= 2;
> > +
> > +   qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels,
> > +is_y_tile ? 4 : 2);
> > +
> > +   if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv)
> > >= 12)
> > +           qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
> > +
> > +   if (DISPLAY_VER(dev_priv) > 11 && num_channels >
> > qi.max_numchannels)
> > +           drm_warn(&dev_priv->drm, "Number of channels exceeds
> > max number of channels.");
> > +   if (qi.max_numchannels != 0)
> > +           num_channels = min_t(u8, num_channels,
> > qi.max_numchannels);
> > +
> > +   dclk_max = icl_sagv_max_dclk(&qi);
> > +
> > +   peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) *
> > dclk_max;
> > +   maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60%
> > */
> >
> > -   maxdebw = min(sa->deprogbwlimit * 1000,
> > -                 icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> >     ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +   /*
> > +    * clperchgroup = 4kpagespermempage * clperchperblock,
> > +    * clperchperblock = 8 / num_channels * interleave
> > +    */
> > +   clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) *
> > qi.deinterleave;
> >
> > -   for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +   for (i = 0; i < num_groups; i++) {
> >             struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +           struct intel_bw_info *bi_next;
> >             int clpchgroup;
> >             int j;
> >
> > -           clpchgroup = (sa->deburst * deinterleave / num_channels) <<
> > i;
> > -           bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > +           if (i < num_groups - 1)
> > +                   bi_next = &dev_priv->max_bw[i + 1];
> > +
> > +           clpchgroup = (sa->deburst * qi.deinterleave / num_channels)
> > << i;
> > +
> > +           if (i < num_groups - 1 && clpchgroup < clperchgroup)
> > +                   bi_next->num_planes = (ipqdepth - clpchgroup) /
> > clpchgroup + 1;
> > +           else
> > +                   bi_next->num_planes = 0;
> >
> >             bi->num_qgv_points = qi.num_points;
> >             bi->num_psf_gv_points = qi.num_psf_points; @@ -310,7
> > +429,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv,
> > const struct intel
> >                      */
> >                     ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> >                                (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
> > -                   bw = icl_calc_bw(sp->dclk, clpchgroup * 32 *
> > num_channels, ct);
> > +                   bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 *
> > num_channels, ct);
> >
> >                     bi->deratedbw[j] = min(maxdebw,
> >                                            bw * (100 - sa->derating) / 100);
> > @@ -329,9 +448,6 @@ static int icl_get_bw_info(struct drm_i915_private
> > *dev_priv, const struct intel
> >                                 "BW%d / PSF GV %d: num_planes=%d
> > bw=%u\n",
> >                                 i, j, bi->num_planes, bi->psf_bw[j]);
> >             }
> > -
> > -           if (bi->num_planes == 1)
> > -                   break;
> >     }
> >
> >     /*
> > @@ -395,6 +511,34 @@ static unsigned int icl_max_bw(struct
> > drm_i915_private *dev_priv,
> >     return 0;
> >  }
> >
> > +static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
> > +                          int num_planes, int qgv_point) {
> > +   int i;
> > +
> > +   /*
> > +    * Let's return max bw for 0 planes
> > +    */
> > +   num_planes = max(1, num_planes);
> > +
> > +   for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) {
> > +           const struct intel_bw_info *bi =
> > +                   &dev_priv->max_bw[i];
> > +
> > +           /*
> > +            * Pcode will not expose all QGV points when
> > +            * SAGV is forced to off/min/med/max.
> > +            */
> > +           if (qgv_point >= bi->num_qgv_points)
> > +                   return UINT_MAX;
> > +
> > +           if (num_planes <= bi->num_planes)
> > +                   return bi->deratedbw[qgv_point];
> > +   }
> > +
> > +   return dev_priv->max_bw[0].deratedbw[qgv_point];
> > +}
> > +
> >  static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv,
> >                            int psf_gv_point)
> >  {
> > @@ -412,13 +556,13 @@ void intel_bw_init_hw(struct drm_i915_private
> > *dev_priv)
> >     if (IS_DG2(dev_priv))
> >             dg2_get_bw_info(dev_priv);
> >     else if (IS_ALDERLAKE_P(dev_priv))
> > -           icl_get_bw_info(dev_priv, &adlp_sa_info);
> > +           tgl_get_bw_info(dev_priv, &adlp_sa_info);
> >     else if (IS_ALDERLAKE_S(dev_priv))
> > -           icl_get_bw_info(dev_priv, &adls_sa_info);
> > +           tgl_get_bw_info(dev_priv, &adls_sa_info);
> >     else if (IS_ROCKETLAKE(dev_priv))
> > -           icl_get_bw_info(dev_priv, &rkl_sa_info);
> > +           tgl_get_bw_info(dev_priv, &rkl_sa_info);
> >     else if (DISPLAY_VER(dev_priv) == 12)
> > -           icl_get_bw_info(dev_priv, &tgl_sa_info);
> > +           tgl_get_bw_info(dev_priv, &tgl_sa_info);
> >     else if (DISPLAY_VER(dev_priv) == 11)
> >             icl_get_bw_info(dev_priv, &icl_sa_info);  } @@ -746,7
> > +890,10 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
> >     for (i = 0; i < num_qgv_points; i++) {
> >             unsigned int max_data_rate;
> >
> > -           max_data_rate = icl_max_bw(dev_priv, num_active_planes,
> > i);
> > +           if (DISPLAY_VER(dev_priv) > 11)
> > +                   max_data_rate = tgl_max_bw(dev_priv,
> > num_active_planes, i);
> > +           else
> > +                   max_data_rate = icl_max_bw(dev_priv,
> > num_active_planes, i);
> >             /*
> >              * We need to know which qgv point gives us
> >              * maximum bandwidth in order to disable SAGV
> > --
> > 2.20.1

Reply via email to