On Sun, Nov 24, 2024 at 02:52:46AM +0530, Akhil P Oommen wrote:
> On Tue, Nov 19, 2024 at 06:56:40PM +0100, Neil Armstrong wrote:
> > The Adreno GMU Management Unit (GMU) can also scale DDR Bandwidth along
> > the Frequency and Power Domain level, but by default we leave the
> > OPP core scale the interconnect ddr path.
> > 
> > In order to calculate vote values used by the GPU Management
> > Unit (GMU), we need to parse all the possible OPP Bandwidths and
> 
> GMU expects a table of votes for each DDR frequency corners. Can we
> please try to figure out a way to do that? Generally, we should ensure the
> data that is send to GMU firmware match downstream exactly. Because,
> when something breaks in firmware or worst, at SoC level, it will be pretty
> hard to narrow down the issue. So, I prefer to be very conservative about
> this.
> 
> KGSL keeps the ddr frequency table in the devicetree. That helps to keep
> the driver lean, but I am not sure if that is viable upstream.
> 
> -Akhil.
> 
> > create a vote value to be sent to the appropriate Bus Control
> > Modules (BCMs) declared in the GPU info struct.
> > 
> > The vote array will then be used to dynamically generate the GMU
> > bw_table sent during the GMU power-up.
> > 
> > Signed-off-by: Neil Armstrong <neil.armstr...@linaro.org>
> > ---
> >  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 153 
> > ++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/msm/adreno/a6xx_gmu.h |  14 ++++
> >  drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   1 +
> >  3 files changed, 168 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
> > b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> > index 
> > 14db7376c712d19446b38152e480bd5a1e0a5198..f6814d92a4edb29ba8a34a34aabb8b2324e9c6a4
> >  100644
> > --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> > @@ -9,6 +9,7 @@
> >  #include <linux/pm_domain.h>
> >  #include <linux/pm_opp.h>
> >  #include <soc/qcom/cmd-db.h>
> > +#include <soc/qcom/tcs.h>
> >  #include <drm/drm_gem.h>
> >  
> >  #include "a6xx_gpu.h"
> > @@ -1287,6 +1288,109 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu 
> > *gmu)
> >     return 0;
> >  }
> >  
> > +/**
> > + * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager 
> > (BCM)
> > + * @unit: divisor used to convert bytes/sec bw value to an RPMh msg
> > + * @width: multiplier used to convert bytes/sec bw value to an RPMh msg
> > + * @vcd: virtual clock domain that this bcm belongs to
> > + * @reserved: reserved field
> > + */
> > +struct bcm_db {
> > +   __le32 unit;
> > +   __le16 width;
> > +   u8 vcd;
> > +   u8 reserved;
> > +};

Shouldn't this be a packed struct? It is naturally aligned, but still!

> > +
> > +static u64 bcm_div(u64 num, u32 base)
> > +{
> > +   /* Ensure that small votes aren't lost. */
> > +   if (num && num < base)
> > +           return 1;
> > +
> > +   do_div(num, base);
> > +
> > +   return num;
> > +}
> > +
> > +static int a6xx_gmu_rpmh_bw_votes_init(const struct a6xx_info *info,
> > +                                  struct a6xx_gmu *gmu)
> > +{
> > +   const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 };
> > +   unsigned int bcm_index, bw_index;
> > +
> > +   /* Retrieve BCM data from cmd-db */
> > +   for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) {
> > +           size_t count;
> > +
> > +           /* Skip unconfigured BCM */
> > +           if (!info->bcm[bcm_index].name)
> > +                   continue;
> > +
> > +           bcm_data[bcm_index] = cmd_db_read_aux_data(
> > +                                           info->bcm[bcm_index].name,
> > +                                           &count);
> > +           if (IS_ERR(bcm_data[bcm_index]))
> > +                   return PTR_ERR(bcm_data[bcm_index]);
> > +
> > +           if (!count)
> > +                   return -EINVAL;
> > +   }
> > +
> > +   /* Generate BCM votes values for each bandwidth & BCM */
> > +   for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) {
> > +           u32 *data = gmu->gpu_bw_votes[bw_index];
> > +           u32 bw = gmu->gpu_bw_table[bw_index];
> > +
> > +           /* Calculations loosely copied from bcm_aggregate() & 
> > tcs_cmd_gen() */
> > +           for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) {
> > +                   bool commit = false;
> > +                   u64 peak, vote;
> > +                   u16 width;
> > +                   u32 unit;
> > +
> > +                   /* Skip unconfigured BCM */
> > +                   if (!info->bcm[bcm_index].name || !bcm_data[bcm_index])
> > +                           continue;
> > +
> > +                   if (bcm_index == GMU_MAX_BCMS - 1 ||
> > +                       (bcm_data[bcm_index + 1] &&
> > +                        bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 
> > 1]->vcd))
> > +                           commit = true;
> > +
> > +                   if (!bw) {
> > +                           data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 
> > 0);
> > +                           continue;
> > +                   }
> > +
> > +                   if (info->bcm[bcm_index].fixed) {
> > +                           u32 perfmode = 0;
> > +
> > +                           if (bw >= info->bcm[bcm_index].perfmode_bw)
> > +                                   perfmode = 
> > info->bcm[bcm_index].perfmode;
> > +
> > +                           data[bcm_index] = BCM_TCS_CMD(commit, true, 0, 
> > perfmode);
> > +                           continue;
> > +                   }
> > +
> > +                   /* Multiply the bandwidth by the width of the 
> > connection */
> > +                   width = le16_to_cpu(bcm_data[bcm_index]->width);
> > +                   peak = bcm_div((u64)bw * width, 
> > info->bcm[bcm_index].buswidth);
> > +
> > +                   /* Input bandwidth value is in KBps, scale the value to 
> > BCM unit */
> > +                   unit = le32_to_cpu(bcm_data[bcm_index]->unit);
> > +                   vote = bcm_div(peak * 1000ULL, unit);
> > +
> > +                   if (vote > BCM_TCS_CMD_VOTE_MASK)
> > +                           vote = BCM_TCS_CMD_VOTE_MASK;
> > +
> > +                   data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote);
> > +           }
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> >  /* Return the 'arc-level' for the given frequency */
> >  static unsigned int a6xx_gmu_get_arc_level(struct device *dev,
> >                                        unsigned long freq)
> > @@ -1390,12 +1494,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct 
> > device *dev, u32 *votes,
> >   * The GMU votes with the RPMh for itself and on behalf of the GPU but we 
> > need
> >   * to construct the list of votes on the CPU and send it over. Query the 
> > RPMh
> >   * voltage levels and build the votes
> > + * The GMU can also vote for DDR interconnects, use the OPP bandwidth 
> > entries
> > + * and BCM parameters to build the votes.
> >   */
> >  
> >  static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
> >  {
> >     struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
> >     struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
> > +   const struct a6xx_info *info = adreno_gpu->info->a6xx;
> >     struct msm_gpu *gpu = &adreno_gpu->base;
> >     int ret;
> >  
> > @@ -1407,6 +1514,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu 
> > *gmu)
> >     ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes,
> >             gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl");
> >  
> > +   /* Build the interconnect votes */
> > +   if (adreno_gpu->info->features & ADRENO_FEAT_GMU_BW_VOTE)
> > +           ret |= a6xx_gmu_rpmh_bw_votes_init(info, gmu);
> > +
> >     return ret;
> >  }
> >  
> > @@ -1442,6 +1553,38 @@ static int a6xx_gmu_build_freq_table(struct device 
> > *dev, unsigned long *freqs,
> >     return index;
> >  }
> >  
> > +static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long 
> > *bandwidths,
> > +           u32 size)
> > +{
> > +   int count = dev_pm_opp_get_opp_count(dev);

I suppose this doesn't count the opps which are not supported by the
SKU. If we can go through *all* OPPs in the opp table irrespective of
the SKU, we will get something close to a full DDR bw table I mentioned
in the prevous mail.

> > +   struct dev_pm_opp *opp;
> > +   int i, index = 0;
> > +   unsigned int bandwidth = 1;
> > +
> > +   /*
> > +    * The OPP table doesn't contain the "off" bandwidth level so we need to
> > +    * add 1 to the table size to account for it
> > +    */
> > +
> > +   if (WARN(count + 1 > size,
> > +           "The GMU bandwidth table is being truncated\n"))
> > +           count = size - 1;
> > +
> > +   /* Set the "off" bandwidth */
> > +   bandwidths[index++] = 0;
> > +
> > +   for (i = 0; i < count; i++) {
> > +           opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0);
> > +           if (IS_ERR(opp))
> > +                   break;
> > +
> > +           dev_pm_opp_put(opp);
> > +           bandwidths[index++] = bandwidth++;
> > +   }
> > +
> > +   return index;
> > +}
> > +
> >  static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
> >  {
> >     struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
> > @@ -1472,6 +1615,16 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu 
> > *gmu)
> >  
> >     gmu->current_perf_index = gmu->nr_gpu_freqs - 1;
> >  
> > +   /*
> > +    * The GMU also handles GPU Interconnect Votes so build a list
> > +    * of DDR bandwidths from the GPU OPP table
> > +    */
> > +   if (adreno_gpu->info->features & ADRENO_FEAT_GMU_BW_VOTE)
> > +           gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev,
> > +                   gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table));
> > +
> > +   gmu->current_perf_index = gmu->nr_gpu_freqs - 1;

duplicate line.

> > +
> >     /* Build the list of RPMh votes that we'll send to the GMU */
> >     return a6xx_gmu_rpmh_votes_init(gmu);
> >  }
> > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h 
> > b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> > index 
> > b4a79f88ccf45cfe651c86d2a9da39541c5772b3..03603eadc0f9ed866899c95e99f333a511ebc3c1
> >  100644
> > --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> > @@ -19,6 +19,16 @@ struct a6xx_gmu_bo {
> >     u64 iova;
> >  };
> >  
> > +#define GMU_MAX_BCMS       3
> > +
> > +struct a6xx_bcm {
> > +   char *name;
> > +   unsigned int buswidth;
> > +   bool fixed;
> > +   unsigned int perfmode;
> > +   unsigned int perfmode_bw;
> > +};
> > +
> >  /*
> >   * These define the different GMU wake up options - these define how both 
> > the
> >   * CPU and the GMU bring up the hardware
> > @@ -82,6 +92,10 @@ struct a6xx_gmu {
> >     unsigned long gpu_freqs[16];
> >     u32 gx_arc_votes[16];
> >  
> > +   int nr_gpu_bws;
> > +   unsigned long gpu_bw_table[16];
> > +   u32 gpu_bw_votes[16][GMU_MAX_BCMS];
> > +
> >     int nr_gmu_freqs;
> >     unsigned long gmu_freqs[4];
> >     u32 cx_arc_votes[4];
> > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
> > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> > index 
> > 4aceffb6aae89c781facc2a6e4a82b20b341b6cb..5b80919e595fa1ba0a3afcca55feb89e60870cb1
> >  100644
> > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> > @@ -44,6 +44,7 @@ struct a6xx_info {
> >     u32 gmu_chipid;
> >     u32 gmu_cgc_mode;
> >     u32 prim_fifo_threshold;
> > +   const struct a6xx_bcm bcm[GMU_MAX_BCMS];

This table is duplicated a lot. Lets keep a pointer instead. We can probably use
this pointer as a flag to check for GMU_IB_VOTE support too.

-Akhil

> >  };
> >  
> >  struct a6xx_gpu {
> > 
> > -- 
> > 2.34.1
> > 

Reply via email to