[PATCH v3 1/6] dt-bindings: drm/msm/gpu: Document gpu opp table

2020-06-05 Thread Sharat Masetty
Update documentation to list the gpu opp table bindings including the
newly added "opp-peak-kBps" needed for GPU-DDR bandwidth scaling.

Signed-off-by: Sharat Masetty 
Acked-by: Rob Herring 
---
 .../devicetree/bindings/display/msm/gpu.txt| 28 ++
 1 file changed, 28 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt 
b/Documentation/devicetree/bindings/display/msm/gpu.txt
index 70025cb..48bd4ab 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -79,6 +79,34 @@ Example a6xx (with GMU):

interconnects = <_hlos MASTER_GFX3D _hlos SLAVE_EBI1>;

+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = ;
+   opp-peak-kBps = <5412000>;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = ;
+   opp-peak-kBps = <1804000>;
+   };
+   };
+
qcom,gmu = <>;

zap-shader {
--
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v3 3/6] drm: msm: a6xx: use dev_pm_opp_set_bw to scale DDR

2020-06-05 Thread Sharat Masetty
This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency. Also since the icc path voting is handled completely
in the opp driver, remove the icc_path handle and its usage in the
drm driver.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   | 23 ---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  8 
 drivers/gpu/drm/msm/msm_gpu.h   |  2 --
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..1dd8fc5 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

gmu->freq = gmu->gpu_freqs[perf_index];

-   /*
-* Eventually we will want to scale the path vote with the frequency but
-* for now leave it at max so that the performance is nominal.
-*/
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+   dev_pm_opp_set_bw(>pdev->dev, opp);
 }

 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
@@ -715,6 +711,19 @@ static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, 
struct a6xx_gmu *gmu)
dev_pm_opp_put(gpu_opp);
 }

+static void a6xx_gmu_set_initial_bw(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
+{
+   struct dev_pm_opp *gpu_opp;
+   unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
+
+   gpu_opp = dev_pm_opp_find_freq_exact(>pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   return;
+
+   dev_pm_opp_set_bw(>pdev->dev, gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+}
+
 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 {
struct adreno_gpu *adreno_gpu = _gpu->base;
@@ -739,7 +748,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
}

/* Set the bus quota to a reasonable value for boot */
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(3072));
+   a6xx_gmu_set_initial_bw(gpu, gmu);

/* Enable the GMU interrupt */
gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0);
@@ -907,7 +916,7 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
a6xx_gmu_shutdown(gmu);

/* Remove the bus vote */
-   icc_set_bw(gpu->icc_path, 0, 0);
+   dev_pm_opp_set_bw(>pdev->dev, NULL);

/*
 * Make sure the GX domain is off before turning off the GMU (CX)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2d13694..718c705 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -913,11 +913,6 @@ static int adreno_get_pwrlevels(struct device *dev,

DBG("fast_rate=%u, slow_rate=2700", gpu->fast_rate);

-   /* Check for an interconnect path for the bus */
-   gpu->icc_path = of_icc_get(dev, NULL);
-   if (IS_ERR(gpu->icc_path))
-   gpu->icc_path = NULL;
-
return 0;
 }

@@ -958,13 +953,10 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,

 void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
 {
-   struct msm_gpu *gpu = _gpu->base;
unsigned int i;

for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++)
release_firmware(adreno_gpu->fw[i]);

-   icc_put(gpu->icc_path);
-
msm_gpu_cleanup(_gpu->base);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index cf0dc6d..c7d74a9 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -112,8 +112,6 @@ struct msm_gpu {
struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
uint32_t fast_rate;

-   struct icc_path *icc_path;
-
/* Hang and Inactivity Detection:
 */
 #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
--
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v3 4/6] arm64: dts: qcom: SDM845: Enable GPU DDR bw scaling

2020-06-05 Thread Sharat Masetty
This patch adds the interconnects property for the gpu node and the
opp-peak-kBps property to the opps of the gpu opp table. This should
help enable DDR bandwidth scaling dynamically and proportionally to the
GPU frequency.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 11fc3f24..6ea6f54 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -3240,6 +3240,8 @@

qcom,gmu = <>;

+   interconnects = <_noc MASTER_GFX3D _noc 
SLAVE_EBI1>;
+
zap_shader: zap-shader {
memory-region = <_mem>;
};
@@ -3250,36 +3252,43 @@
opp-71000 {
opp-hz = /bits/ 64 <71000>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-67500 {
opp-hz = /bits/ 64 <67500>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-59600 {
opp-hz = /bits/ 64 <59600>;
opp-level = 
;
+   opp-peak-kBps = <622>;
};

opp-52000 {
opp-hz = /bits/ 64 <52000>;
opp-level = ;
+   opp-peak-kBps = <622>;
};

opp-41400 {
opp-hz = /bits/ 64 <41400>;
opp-level = 
;
+   opp-peak-kBps = <4068000>;
};

opp-34200 {
opp-hz = /bits/ 64 <34200>;
opp-level = ;
+   opp-peak-kBps = <2724000>;
};

opp-25700 {
opp-hz = /bits/ 64 <25700>;
opp-level = 
;
+   opp-peak-kBps = <1648000>;
};
};
};
--
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v3 5/6] arm64: dts: qcom: sc7180: Add interconnects property for GPU

2020-06-05 Thread Sharat Masetty
This patch adds the interconnects property to the GPU node. This enables
the GPU->DDR path bandwidth voting.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index eaede5e..34004ad 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1497,6 +1497,8 @@
operating-points-v2 = <_opp_table>;
qcom,gmu = <>;

+   interconnects = <_noc MASTER_GFX3D _virt 
SLAVE_EBI1>;
+
gpu_opp_table: opp-table {
compatible = "operating-points-v2";

--
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v3 6/6] arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp

2020-06-05 Thread Sharat Masetty
Add opp-peak-kBps bindings to the GPU opp table, listing the peak
GPU -> DDR bandwidth requirement for each opp level. This will be
used to scale the DDR bandwidth along with the GPU frequency dynamically.

Signed-off-by: Sharat Masetty 
Reviewed-by: Matthias Kaehlcke 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 34004ad..7bef42b 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1505,36 +1505,43 @@
opp-8 {
opp-hz = /bits/ 64 <8>;
opp-level = 
;
+   opp-peak-kBps = <8532000>;
};

opp-65000 {
opp-hz = /bits/ 64 <65000>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-56500 {
opp-hz = /bits/ 64 <56500>;
opp-level = ;
+   opp-peak-kBps = <5412000>;
};

opp-43000 {
opp-hz = /bits/ 64 <43000>;
opp-level = 
;
+   opp-peak-kBps = <5412000>;
};

opp-35500 {
opp-hz = /bits/ 64 <35500>;
opp-level = ;
+   opp-peak-kBps = <3072000>;
};

opp-26700 {
opp-hz = /bits/ 64 <26700>;
opp-level = 
;
+   opp-peak-kBps = <3072000>;
};

opp-18000 {
opp-hz = /bits/ 64 <18000>;
opp-level = 
;
+   opp-peak-kBps = <1804000>;
};
};
};
--
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v3 2/6] drm: msm: a6xx: send opp instead of a frequency

2020-06-05 Thread Sharat Masetty
This patch changes the plumbing to send the devfreq recommended opp rather
than the frequency. Also consolidate and rearrange the code in a6xx to set
the GPU frequency and the icc vote in preparation for the upcoming
changes for GPU->DDR scaling votes.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 62 +++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 +-
 drivers/gpu/drm/msm/msm_gpu.c |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h |  3 +-
 4 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 748cd37..2d8124b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -100,17 +100,30 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
 }

-static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
+void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
 {
-   struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
-   struct adreno_gpu *adreno_gpu = _gpu->base;
-   struct msm_gpu *gpu = _gpu->base;
-   int ret;
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_gmu *gmu = _gpu->gmu;
+   u32 perf_index;
+   unsigned long gpu_freq;
+   int ret = 0;
+
+   gpu_freq = dev_pm_opp_get_freq(opp);
+
+   if (gpu_freq == gmu->freq)
+   return;
+
+   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
+   if (gpu_freq == gmu->gpu_freqs[perf_index])
+   break;
+
+   gmu->current_perf_index = perf_index;

gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);

gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
-   ((3 & 0xf) << 28) | index);
+   ((3 & 0xf) << 28) | perf_index);

/*
 * Send an invalid index as a vote for the bus bandwidth and let the
@@ -126,7 +139,7 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
if (ret)
dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret);

-   gmu->freq = gmu->gpu_freqs[index];
+   gmu->freq = gmu->gpu_freqs[perf_index];

/*
 * Eventually we will want to scale the path vote with the frequency but
@@ -135,25 +148,6 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
 }

-void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
-{
-   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-   struct a6xx_gmu *gmu = _gpu->gmu;
-   u32 perf_index = 0;
-
-   if (freq == gmu->freq)
-   return;
-
-   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
-   if (freq == gmu->gpu_freqs[perf_index])
-   break;
-
-   gmu->current_perf_index = perf_index;
-
-   __a6xx_gmu_set_freq(gmu, perf_index);
-}
-
 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -708,6 +702,19 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
a6xx_gmu_rpmh_off(gmu);
 }

+static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu 
*gmu)
+{
+   struct dev_pm_opp *gpu_opp;
+   unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
+
+   gpu_opp = dev_pm_opp_find_freq_exact(>pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   return;
+
+   a6xx_gmu_set_freq(gpu, gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+}
+
 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 {
struct adreno_gpu *adreno_gpu = _gpu->base;
@@ -759,8 +766,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK);
enable_irq(gmu->hfi_irq);

-   /* Set the GPU to the current freq */
-   __a6xx_gmu_set_freq(gmu, gmu->current_perf_index);
+   a6xx_gmu_set_initial_freq(gpu, gmu);

/*
 * "enable" the GX power domain which won't actually do anything but it
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 7239b8b..03ba60d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -63,7 +63,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum 
a6xx_gmu_oob_state state);
 int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
 void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);

-void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq);
+void a

[PATCH v3 0/6] Add support for GPU DDR BW scaling

2020-06-05 Thread Sharat Masetty
This is a respin of [1]. Incorported review feedback and fixed issues observed
during testing. Picked up the Georgi's series from opp/linux-next [2], and this
series is also dependent on a helper function needed to set and clear ddr
bandwidth vote [3]. Patch number 4 in the series adds support for SDM845 as well
but its not tested yet(WIP), but the SC7180 patches are well tested now.

[1] https://patchwork.freedesktop.org/series/75291/
[2] 
https://kernel.googlesource.com/pub/scm/linux/kernel/git/vireshk/pm/+log/opp/linux-next/
[3] https://patchwork.kernel.org/patch/11590563/

Sharat Masetty (6):
  dt-bindings: drm/msm/gpu: Document gpu opp table
  drm: msm: a6xx: send opp instead of a frequency
  drm: msm: a6xx: use dev_pm_opp_set_bw to scale DDR
  arm64: dts: qcom: SDM845: Enable GPU DDR bw scaling
  arm64: dts: qcom: sc7180: Add interconnects property for GPU
  arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp

 .../devicetree/bindings/display/msm/gpu.txt| 28 +++
 arch/arm64/boot/dts/qcom/sc7180.dtsi   |  9 +++
 arch/arm64/boot/dts/qcom/sdm845.dtsi   |  9 +++
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c  | 85 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h  |  2 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c|  8 --
 drivers/gpu/drm/msm/msm_gpu.c  |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h  |  5 +-
 8 files changed, 100 insertions(+), 49 deletions(-)

--
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [Freedreno] [PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth

2020-05-28 Thread Sharat Masetty



On 5/27/2020 9:08 PM, Rob Clark wrote:

On Wed, May 27, 2020 at 1:47 AM Sharat Masetty  wrote:

+ more folks

On 5/18/2020 9:55 PM, Rob Clark wrote:

On Mon, May 18, 2020 at 7:23 AM Jordan Crouse  wrote:

On Thu, May 14, 2020 at 04:24:18PM +0530, Sharat Masetty wrote:

This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency.

Signed-off-by: Sharat Masetty 
---
   drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +-
   1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..79433d3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

gmu->freq = gmu->gpu_freqs[perf_index];

- /*
-  * Eventually we will want to scale the path vote with the frequency but
-  * for now leave it at max so that the performance is nominal.
-  */
- icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+ dev_pm_opp_set_bw(>pdev->dev, opp);
   }

This adds an implicit requirement that all targets need bandwidth settings
defined in the OPP or they won't get a bus vote at all. I would prefer that
there be an default escape valve but if not you'll need to add
bandwidth values for the sdm845 OPP that target doesn't regress.


it looks like we could maybe do something like:

ret = dev_pm_opp_set_bw(...);
if (ret) {
dev_warn_once(dev, "no bandwidth settings");
icc_set_bw(...);
}

?

BR,
-R

There is a bit of an issue here - Looks like its not possible to two icc
handles to the same path.  Its causing double enumeration of the paths
in the icc core and messing up path votes. With [1] Since opp/core
already gets a handle to the icc path as part of table add,  drm/msm
could do either

a) Conditionally enumerate gpu->icc_path handle only when pm/opp core
has not got the icc path handle. I could use something like [2] to
determine if should initialize gpu->icc_path*

b) Add peak-opp-configs in 845 dt and mandate all future versions to use
this bindings. With this, I can remove gpu->icc_path from msm/drm
completely and only rely on opp/core for bw voting.

The main thing is that we want to make sure newer dtb always works on
an older kernel without regression.. but, hmm..  I guess the
interconnects/interconnects-names properties haven't landed yet in
sdm845.dtsi?  Maybe that lets us go with the simpler approach (b).
Looks like we haven't wired up interconnect for 8916 or 8996 either,
so probably we can just mandate this for all of them?


I checked all three 845, 820 and 8916 and none of them have the 
interconnect configs for GPU. So, I think we are good here. I'll go with 
option (b) and re-spin v3. Adding interconnects and opp-peak-kBps 
configs for previous chips can be taken up as a separate activity.


Sharat


If we have landed the interconnect dts hookup for gpu somewhere that
I'm overlooking, I guess we would have to go with (a) and keep the
existing interconnects/interconnects-names properties.

BR,
-R


[1] - https://lore.kernel.org/patchwork/cover/1240687/

[2] - https://patchwork.kernel.org/patch/11527573/

Let me know your thoughts

Sharat


Jordan


   unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
--
2.7.4


--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [Freedreno] [PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth

2020-05-27 Thread Sharat Masetty

+ more folks

On 5/18/2020 9:55 PM, Rob Clark wrote:

On Mon, May 18, 2020 at 7:23 AM Jordan Crouse  wrote:

On Thu, May 14, 2020 at 04:24:18PM +0530, Sharat Masetty wrote:

This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency.

Signed-off-by: Sharat Masetty 
---
  drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +-
  1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..79433d3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

   gmu->freq = gmu->gpu_freqs[perf_index];

- /*
-  * Eventually we will want to scale the path vote with the frequency but
-  * for now leave it at max so that the performance is nominal.
-  */
- icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+ dev_pm_opp_set_bw(>pdev->dev, opp);
  }

This adds an implicit requirement that all targets need bandwidth settings
defined in the OPP or they won't get a bus vote at all. I would prefer that
there be an default escape valve but if not you'll need to add
bandwidth values for the sdm845 OPP that target doesn't regress.


it looks like we could maybe do something like:

   ret = dev_pm_opp_set_bw(...);
   if (ret) {
   dev_warn_once(dev, "no bandwidth settings");
   icc_set_bw(...);
   }

?

BR,
-R


There is a bit of an issue here - Looks like its not possible to two icc 
handles to the same path.  Its causing double enumeration of the paths 
in the icc core and messing up path votes. With [1] Since opp/core 
already gets a handle to the icc path as part of table add,  drm/msm 
could do either


a) Conditionally enumerate gpu->icc_path handle only when pm/opp core 
has not got the icc path handle. I could use something like [2] to 
determine if should initialize gpu->icc_path*


b) Add peak-opp-configs in 845 dt and mandate all future versions to use 
this bindings. With this, I can remove gpu->icc_path from msm/drm 
completely and only rely on opp/core for bw voting.


[1] - https://lore.kernel.org/patchwork/cover/1240687/

[2] - https://patchwork.kernel.org/patch/11527573/

Let me know your thoughts

Sharat




Jordan


  unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
--
2.7.4


--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 4/6] drm: msm: a6xx: send opp instead of a frequency

2020-05-14 Thread Sharat Masetty
This patch changes the plumbing to send the devfreq recommended opp rather
than the frequency. Also consolidate and rearrange the code in a6xx to set
the GPU frequency and the icc vote in preparation for the upcoming
changes for GPU->DDR scaling votes.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 62 +++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |  2 +-
 drivers/gpu/drm/msm/msm_gpu.c |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h |  3 +-
 4 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 748cd37..2d8124b 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -100,17 +100,30 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
 }

-static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
+void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
 {
-   struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
-   struct adreno_gpu *adreno_gpu = _gpu->base;
-   struct msm_gpu *gpu = _gpu->base;
-   int ret;
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+   struct a6xx_gmu *gmu = _gpu->gmu;
+   u32 perf_index;
+   unsigned long gpu_freq;
+   int ret = 0;
+
+   gpu_freq = dev_pm_opp_get_freq(opp);
+
+   if (gpu_freq == gmu->freq)
+   return;
+
+   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
+   if (gpu_freq == gmu->gpu_freqs[perf_index])
+   break;
+
+   gmu->current_perf_index = perf_index;

gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);

gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
-   ((3 & 0xf) << 28) | index);
+   ((3 & 0xf) << 28) | perf_index);

/*
 * Send an invalid index as a vote for the bus bandwidth and let the
@@ -126,7 +139,7 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
if (ret)
dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret);

-   gmu->freq = gmu->gpu_freqs[index];
+   gmu->freq = gmu->gpu_freqs[perf_index];

/*
 * Eventually we will want to scale the path vote with the frequency but
@@ -135,25 +148,6 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)
icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
 }

-void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
-{
-   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-   struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-   struct a6xx_gmu *gmu = _gpu->gmu;
-   u32 perf_index = 0;
-
-   if (freq == gmu->freq)
-   return;
-
-   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
-   if (freq == gmu->gpu_freqs[perf_index])
-   break;
-
-   gmu->current_perf_index = perf_index;
-
-   __a6xx_gmu_set_freq(gmu, perf_index);
-}
-
 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -708,6 +702,19 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
a6xx_gmu_rpmh_off(gmu);
 }

+static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu 
*gmu)
+{
+   struct dev_pm_opp *gpu_opp;
+   unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
+
+   gpu_opp = dev_pm_opp_find_freq_exact(>pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   return;
+
+   a6xx_gmu_set_freq(gpu, gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+}
+
 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
 {
struct adreno_gpu *adreno_gpu = _gpu->base;
@@ -759,8 +766,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK);
enable_irq(gmu->hfi_irq);

-   /* Set the GPU to the current freq */
-   __a6xx_gmu_set_freq(gmu, gmu->current_perf_index);
+   a6xx_gmu_set_initial_freq(gpu, gmu);

/*
 * "enable" the GX power domain which won't actually do anything but it
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index 7239b8b..03ba60d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -63,7 +63,7 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum 
a6xx_gmu_oob_state state);
 int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
 void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);

-void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq);
+void a

[PATCH 5/6] drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth

2020-05-14 Thread Sharat Masetty
This patches replaces the previously used static DDR vote and uses
dev_pm_opp_set_bw() to scale GPU->DDR bandwidth along with scaling
GPU frequency.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 2d8124b..79433d3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -141,11 +141,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct 
dev_pm_opp *opp)

gmu->freq = gmu->gpu_freqs[perf_index];

-   /*
-* Eventually we will want to scale the path vote with the frequency but
-* for now leave it at max so that the performance is nominal.
-*/
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+   dev_pm_opp_set_bw(>pdev->dev, opp);
 }

 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/6] arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp

2020-05-14 Thread Sharat Masetty
Add opp-peak-kBps bindings to the GPU opp table, listing the peak
GPU -> DDR bandwidth requirement for each opp level. This will be
used to scale the DDR bandwidth along with the GPU frequency dynamically.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 0ce9921..89f7767 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1392,36 +1392,43 @@
opp-8 {
opp-hz = /bits/ 64 <8>;
opp-level = 
;
+   opp-peak-kBps = <8532000>;
};

opp-65000 {
opp-hz = /bits/ 64 <65000>;
opp-level = 
;
+   opp-peak-kBps = <7216000>;
};

opp-56500 {
opp-hz = /bits/ 64 <56500>;
opp-level = ;
+   opp-peak-kBps = <5412000>;
};

opp-43000 {
opp-hz = /bits/ 64 <43000>;
opp-level = 
;
+   opp-peak-kBps = <5412000>;
};

opp-35500 {
opp-hz = /bits/ 64 <35500>;
opp-level = ;
+   opp-peak-kBps = <3072000>;
};

opp-26700 {
opp-hz = /bits/ 64 <26700>;
opp-level = 
;
+   opp-peak-kBps = <3072000>;
};

opp-18000 {
opp-hz = /bits/ 64 <18000>;
opp-level = 
;
+   opp-peak-kBps = <1804000>;
};
};
};
--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 0/6] Add support for GPU DDR BW scaling

2020-05-14 Thread Sharat Masetty
This is a rework of my previous series [1], but this time based on the bindings
from Georgi [2] + a few fixes which look to be fixed in v8 of Georgi's series
[3]. The work is based on the chromeOS tip.

[1]: https://patchwork.freedesktop.org/series/75291/
[2]: https://lore.kernel.org/patchwork/cover/1230626/
[3]: https://lore.kernel.org/patchwork/cover/1240687/

Sharat Masetty (5):
  arm64: dts: qcom: sc7180: Add interconnect bindings for GPU
  arm64: dts: qcom: sc7180: Add opp-peak-kBps to GPU opp
  drm: msm: a6xx: send opp instead of a frequency
  drm: msm: a6xx: use dev_pm_opp_set_bw to set DDR bandwidth
  dt-bindings: drm/msm/gpu: Document gpu opp table

Sibi Sankar (1):
  OPP: Add and export helper to set bandwidth

 .../devicetree/bindings/display/msm/gpu.txt| 28 +
 arch/arm64/boot/dts/qcom/sc7180.dtsi   |  9 +++
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c  | 68 +++---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h  |  2 +-
 drivers/gpu/drm/msm/msm_gpu.c  |  3 +-
 drivers/gpu/drm/msm/msm_gpu.h  |  3 +-
 drivers/opp/core.c | 43 ++
 include/linux/pm_opp.h |  6 ++
 8 files changed, 125 insertions(+), 37 deletions(-)

--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 6/6] dt-bindings: drm/msm/gpu: Document gpu opp table

2020-05-14 Thread Sharat Masetty
Update documentation to list the gpu opp table bindings including the
newly added "opp-peak-kBps" needed for GPU-DDR bandwidth scaling.

Signed-off-by: Sharat Masetty 
---
 .../devicetree/bindings/display/msm/gpu.txt| 28 ++
 1 file changed, 28 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt 
b/Documentation/devicetree/bindings/display/msm/gpu.txt
index 70025cb..48bd4ab 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -79,6 +79,34 @@ Example a6xx (with GMU):

interconnects = <_hlos MASTER_GFX3D _hlos SLAVE_EBI1>;

+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = ;
+   opp-peak-kBps = <5412000>;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = ;
+   opp-peak-kBps = <3072000>;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = ;
+   opp-peak-kBps = <1804000>;
+   };
+   };
+
qcom,gmu = <>;

zap-shader {
--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/6] arm64: dts: qcom: sc7180: Add interconnect bindings for GPU

2020-05-14 Thread Sharat Masetty
This patch adds the interconnect bindings to the GPU node. This enables
the GPU->DDR path bandwidth voting.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index b46ee78..0ce9921 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1384,6 +1384,8 @@
operating-points-v2 = <_opp_table>;
qcom,gmu = <>;

+   interconnects = <_noc MASTER_GFX3D _virt 
SLAVE_EBI1>;
+
gpu_opp_table: opp-table {
compatible = "operating-points-v2";

--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 3/6] OPP: Add and export helper to set bandwidth

2020-05-14 Thread Sharat Masetty
From: Sibi Sankar 

Add and export 'dev_pm_opp_set_bw' to set the bandwidth
levels associated with an OPP for a given frequency.

Signed-off-by: Sibi Sankar 
Signed-off-by: Sharat Masetty 
---
 drivers/opp/core.c | 43 +++
 include/linux/pm_opp.h |  6 ++
 2 files changed, 49 insertions(+)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index f42b7c4..0f34077 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -828,6 +828,49 @@ static int _set_required_opps(struct device *dev,
 }

 /**
+ * dev_pm_opp_set_bw() - sets bandwidth levels corresponding to an available 
opp
+ * @dev:   device for which we do this operation
+ * @opp:   opp based on which the bandwidth levels are to be configured
+ *
+ * This configures the bandwidth to the levels specified
+ * by the OPP.
+ *
+ * Return: 0 on success or a negative error value.
+ */
+int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp)
+{
+   struct opp_table *opp_table;
+   int ret = -EINVAL;
+   int i;
+
+   if (IS_ERR_OR_NULL(opp) || !opp->available) {
+   dev_err(dev, "%s: Invalid parameters\n", __func__);
+   return -EINVAL;
+   }
+
+   opp_table = _find_opp_table(dev);
+   if (IS_ERR(opp_table)) {
+   dev_err(dev, "%s: device opp table doesn't exist\n", __func__);
+   return PTR_ERR(opp_table);
+   }
+
+   if (opp_table->paths) {
+   for (i = 0; i < opp_table->path_count; i++) {
+   ret = icc_set_bw(opp_table->paths[i],
+opp->bandwidth[i].avg,
+opp->bandwidth[i].peak);
+   if (ret)
+   dev_err(dev, "Failed to set bandwidth[%d]: 
%d\n",
+   i, ret);
+   }
+   }
+
+   dev_pm_opp_put_opp_table(opp_table);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_set_bw);
+
+/**
  * dev_pm_opp_set_rate() - Configure new OPP based on frequency
  * @dev:device for which we do this operation
  * @target_freq: frequency to achieve
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 76f8c6b..04f7fda 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -156,6 +156,7 @@ struct dev_pm_opp *dev_pm_opp_xlate_opp(struct opp_table 
*src_table,
struct opp_table *dst_table,
struct dev_pm_opp *src_opp);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
+int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask 
*cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask 
*cpumask);
 void dev_pm_opp_remove_table(struct device *dev);
@@ -354,6 +355,11 @@ static inline int dev_pm_opp_set_rate(struct device *dev, 
unsigned long target_f
return -ENOTSUPP;
 }

+static inline int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp)
+{
+   return -ENOTSUPP;
+}
+
 static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const 
struct cpumask *cpumask)
 {
return -ENOTSUPP;
--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/2] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-05-01 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
* Remove GCC_DDRSS_GPU_AXI_CLK clock reference from gpu smmu node.

 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 4216b57..de9a054 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1373,6 +1373,108 @@
};
};

+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>,
+   <0 0x05061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+   interrupts = ;
+   iommus = <_smmu 0>;
+   operating-points-v2 = <_opp_table>;
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x0504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>;
+   clock-names = "bus", "iface";
+
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618.0", "qcom,adreno-gmu";
+   reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 
0x1>,
+   <0 0x0b49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_GPU_MEMNOC_GFX_CLK>;
+   clock-names = "gmu", "cxo", "axi", "memnoc";
+   power-domains = < CX_GDSC>, < GX_GDSC>;
+   power-domain-names = "cx", "gx

[PATCH 2/2] dt-bindings: arm-smmu: Add sc7180 compatible string

2020-05-01 Thread Sharat Masetty
This patch simply adds a new compatible string for SC7180 platform.

Signed-off-by: Sharat Masetty 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..986098b 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
   - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2
 
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [Freedreno] [PATCH v2] dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock

2020-05-01 Thread Sharat Masetty



On 4/30/2020 11:51 PM, Doug Anderson wrote:

Hi,

On Thu, Apr 30, 2020 at 11:12 AM Jordan Crouse  wrote:

On Thu, Apr 30, 2020 at 09:29:47AM +0530, Sharat Masetty wrote:

This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
v2: Addressed review comments from Doug

  Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 8 
  1 file changed, 8 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..ba5dba4 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
- enum:
- qcom,msm8996-smmu-v2
- qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
- qcom,sdm845-smmu-v2
- const: qcom,smmu-v2

@@ -113,16 +114,23 @@ properties:
present in such cases.

clock-names:
+minItems: 2
+maxItems: 3
  items:
- const: bus
- const: iface
+  - const: mem_iface

Hi Sharat -

I think there was a bit of confusion due to renaming between downstream and
upstream.  Currently for the sdm845 and friends we have:

   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
  < GCC_GPU_CFG_AHB_CLK>;
   clock-names = "bus", "iface";

Confusingly these same clocks downstream are "mem_iface_clk" and "iface_clk"
respectively.

It looks like you are trying to add GCC_DDRSS_GPU_AXI_CLK as "mem_iface" which
was formerly "mem_clk" downstream. I'm not sure if the naming change is
intentional or you were trying to make upstream and downstream match and didn't
realize that they were renamed.

I'm not sure if we need DDRSS_GPU_AXI_CLK or not. Empirically it works without
it for sdm845 (I don't have a sc7180 to test) but we should probably loop back
with either the clock team or the hardware designers to be sure there isn't a
corner case that is missing. I agree with Doug that its always best if we don't
need to add a clock.


Thanks Jordan and Doug for the updates. My intention was to add the 
third clock as listed downstream, but as you said the naming is a bit 
misleading. From the clock GCC_DDRSS_GPU_AXI_CLK description, this is 
needed for the GPU to DDR access and all transactions to the DDR from 
the GPU go through the SMMU. It is listed in the SMMU dt node because 
its needed by SMMU to perform pagetable walks.


I think we may be fine by not listing this clock in the SMMU node 
because the same clock is listed in both the GMU and also the GPU.



I can confirm that on sc7180 the GPU seems to come up just fine
without the clock being specified in the iommu node.  Definitely would
be good to know what's broken and if nothing is broken maybe we can
change this patch to just add the sc7180 compatible string and drop
the clock.  I do note that the GMU already has a reference to the same
"GCC_DDRSS_GPU_AXI_CLK" clock.

-Doug
___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2] dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock

2020-04-29 Thread Sharat Masetty
This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
v2: Addressed review comments from Doug

 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 8 
 1 file changed, 8 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..ba5dba4 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
   - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2

@@ -113,16 +114,23 @@ properties:
   present in such cases.

   clock-names:
+minItems: 2
+maxItems: 3
 items:
   - const: bus
   - const: iface
+  - const: mem_iface

   clocks:
+minItems: 2
+maxItems: 3
 items:
   - description: bus clock required for downstream bus access and for the
   smmu ptw
   - description: interface clock required to access smmu's registers
   through the TCU's programming interface.
+  - description: clock required for the inner working of SMMU TBUs and the
+  TCU like the pagetable walks and the TLB flushes.

   power-domains:
 maxItems: 1
--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] dt-bindings: arm-smmu: Add a new compatible string and a clock

2020-04-29 Thread Sharat Masetty



On 4/29/2020 3:57 AM, Doug Anderson wrote:

Hi,

On Tue, Apr 28, 2020 at 4:39 AM Sharat Masetty  wrote:

This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
  Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 7 +++
  1 file changed, 7 insertions(+)

nit: mention sc7180 in subject, like:

dt-bindings: arm-smmu: Add sc7180 compatible string and mem_iface clock



diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..15946ac 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
- enum:
- qcom,msm8996-smmu-v2
- qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
- qcom,sdm845-smmu-v2
- const: qcom,smmu-v2

@@ -113,16 +114,22 @@ properties:
present in such cases.

clock-names:
+minItems: 2
+maxItems: 3
  items:
- const: bus
- const: iface
+  - const: mem_iface_clk

People usually frown on clock-names ending in "_clk".  Just name it "mem_iface".



clocks:
+minItems: 2
+maxItems: 3
  items:
- description: bus clock required for downstream bus access and for the
smmu ptw
- description: interface clock required to access smmu's registers
through the TCU's programming interface.
+  - description: clock required for the SMMU TBUs and the TCU

Is this clock only needed for sc7180, or would it be useful if we
enabled certain features on existing devices?  Please document exactly
when someone would provide this clock and when they'd leave it off.

...also: maybe it's obvious to those that understand IOMMUs in depth,
but to me I have no idea what your description means and why it's
different from the other two clocks.  Any way you could punch up your
description a little bit?

Looking at sdm845 I see that this clock seems to exist but wasn't
listed in the IOMMU device tree node.  Is that a mistake on sdm845?
...or is it just fine because the GPU holds the clock?  Is there a
reason the sdm845 solution and the sc7180 solution shouldn't be the
same (AKA we should either add this clock to the sdm845 device tree
file or remove it from sc7180)?


I went and checked the downstream SDM845 device tree for GPU SMMU and I 
do see this clock listed on there. I am no expert in SMMU either but my 
understanding is that this clock is needed for core working of the SMMU 
like the pagetable walks, TLB invalidations etc, whereas the other two 
clocks are required to access SMMU register space from the host.My 
proposal is to add this clock to SDM845 as well as a follow up effort so 
that we can remove the Min/MaxItems properties which I do not like.


@Jordan, do you remember why this clock was added to SDM845?


Thanks!

-Doug

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] dt-bindings: arm-smmu: Add a new compatible string and a clock

2020-04-28 Thread Sharat Masetty
This patch adds a new compatible string for sc7180 and also an
additional clock listing needed to power the TBUs and the TCU.

Signed-off-by: Sharat Masetty 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..15946ac 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
   - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2
 
@@ -113,16 +114,22 @@ properties:
   present in such cases.
 
   clock-names:
+minItems: 2
+maxItems: 3
 items:
   - const: bus
   - const: iface
+  - const: mem_iface_clk
 
   clocks:
+minItems: 2
+maxItems: 3
 items:
   - description: bus clock required for downstream bus access and for the
   smmu ptw
   - description: interface clock required to access smmu's registers
   through the TCU's programming interface.
+  - description: clock required for the SMMU TBUs and the TCU
 
   power-domains:
 maxItems: 1
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 3/5] drm: msm: scale DDR BW along with GPU frequency

2020-04-01 Thread Sharat Masetty


On 3/31/2020 10:56 PM, Jordan Crouse wrote:

On Tue, Mar 31, 2020 at 01:25:51PM +0530, Sharat Masetty wrote:

This patch adds support to parse the OPP tables attached the GPU device,
the main opp table and the DDR bandwidth opp table. Additionally, vote
for the GPU->DDR bandwidth when setting the GPU frequency by querying
the linked DDR BW opp to the GPU opp.

Signed-off-by: Sharat Masetty 
---
  drivers/gpu/drm/msm/adreno/a6xx_gmu.c   | 41 ++
  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 44 +
  drivers/gpu/drm/msm/msm_gpu.h   |  9 +++
  3 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 748cd37..489d9b6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -100,6 +100,40 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
  }

+void a6xx_gmu_set_icc_vote(struct msm_gpu *gpu, unsigned long gpu_freq)
+{
+   struct dev_pm_opp *gpu_opp, *ddr_opp;
+   struct opp_table **tables = gpu->opp_tables;
+   unsigned long peak_bw;
+
+   if (!gpu->opp_tables[GPU_DDR_OPP_TABLE_INDEX])
+   goto done;
+
+   gpu_opp = dev_pm_opp_find_freq_exact(>pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   goto done;
+
+   ddr_opp = dev_pm_opp_xlate_required_opp(tables[GPU_OPP_TABLE_INDEX],
+   tables[GPU_DDR_OPP_TABLE_INDEX],
+   gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+
+   if (IS_ERR_OR_NULL(ddr_opp))
+   goto done;

I think that the final approach is still up in the air but either way we're
going to pull the bandwidth from an OPP, its just a question of which OPP.


+   peak_bw = dev_pm_opp_get_bw(ddr_opp, NULL);
+   dev_pm_opp_put(ddr_opp);
+
+   icc_set_bw(gpu->icc_path, 0, peak_bw);
+   return;
+done:
+   /*
+* If there is a problem, for now leave it at max so that the
+* performance is nominal.
+*/
+   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+}
+
  static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
  {
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
@@ -128,11 +162,8 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)

gmu->freq = gmu->gpu_freqs[index];

-   /*
-* Eventually we will want to scale the path vote with the frequency but
-* for now leave it at max so that the performance is nominal.
-*/
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+   if (gpu->icc_path)
+   a6xx_gmu_set_icc_vote(gpu, gmu->freq);

This function is annoying because we call it from two different spots, but it
feels wasteful that devfreq gives us an OPP pointer and we go out of our way to
not use it only to search for it again in the set_icc_vote function. I think
maybe we should pass the OPP through from msm_gpu.c.  We could have a helper
function to pull the initial opp in a6xx_gmu_resume to make it clean.


Yes Jordan, it makes sense. I did think about this too, but may be I 
was  a bit too lazy to change the existing plumbing :)


I will take care of this in the next iteration.




  }

  void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2d13694..bbbcc7a 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -882,7 +882,7 @@ static int adreno_get_pwrlevels(struct device *dev,
  {
unsigned long freq = ULONG_MAX;
struct dev_pm_opp *opp;
-   int ret;
+   int ret, i;

gpu->fast_rate = 0;

@@ -890,9 +890,29 @@ static int adreno_get_pwrlevels(struct device *dev,
if (!of_find_property(dev->of_node, "operating-points-v2", NULL))
ret = adreno_get_legacy_pwrlevels(dev);
else {
-   ret = dev_pm_opp_of_add_table(dev);
-   if (ret)
-   DRM_DEV_ERROR(dev, "Unable to set the OPP table\n");
+   int count = of_count_phandle_with_args(dev->of_node,
+   "operating-points-v2", NULL);
+
+   count = min(count, GPU_DDR_OPP_TABLE_INDEX + 1);
+   count = max(count, 1);
+
+   for (i = 0; i < count; i++) {
+   ret = dev_pm_opp_of_add_table_indexed(dev, i);
+   if (ret) {
+   DRM_DEV_ERROR(dev, "Add OPP table %d: failed 
%d\n",
+   i, ret);
+  

[PATCH 3/5] drm: msm: scale DDR BW along with GPU frequency

2020-03-31 Thread Sharat Masetty
This patch adds support to parse the OPP tables attached the GPU device,
the main opp table and the DDR bandwidth opp table. Additionally, vote
for the GPU->DDR bandwidth when setting the GPU frequency by querying
the linked DDR BW opp to the GPU opp.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   | 41 ++
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 44 +
 drivers/gpu/drm/msm/msm_gpu.h   |  9 +++
 3 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 748cd37..489d9b6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -100,6 +100,40 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
 }

+void a6xx_gmu_set_icc_vote(struct msm_gpu *gpu, unsigned long gpu_freq)
+{
+   struct dev_pm_opp *gpu_opp, *ddr_opp;
+   struct opp_table **tables = gpu->opp_tables;
+   unsigned long peak_bw;
+
+   if (!gpu->opp_tables[GPU_DDR_OPP_TABLE_INDEX])
+   goto done;
+
+   gpu_opp = dev_pm_opp_find_freq_exact(>pdev->dev, gpu_freq, true);
+   if (IS_ERR_OR_NULL(gpu_opp))
+   goto done;
+
+   ddr_opp = dev_pm_opp_xlate_required_opp(tables[GPU_OPP_TABLE_INDEX],
+   tables[GPU_DDR_OPP_TABLE_INDEX],
+   gpu_opp);
+   dev_pm_opp_put(gpu_opp);
+
+   if (IS_ERR_OR_NULL(ddr_opp))
+   goto done;
+
+   peak_bw = dev_pm_opp_get_bw(ddr_opp, NULL);
+   dev_pm_opp_put(ddr_opp);
+
+   icc_set_bw(gpu->icc_path, 0, peak_bw);
+   return;
+done:
+   /*
+* If there is a problem, for now leave it at max so that the
+* performance is nominal.
+*/
+   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+}
+
 static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
 {
struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
@@ -128,11 +162,8 @@ static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int 
index)

gmu->freq = gmu->gpu_freqs[index];

-   /*
-* Eventually we will want to scale the path vote with the frequency but
-* for now leave it at max so that the performance is nominal.
-*/
-   icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+   if (gpu->icc_path)
+   a6xx_gmu_set_icc_vote(gpu, gmu->freq);
 }

 void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2d13694..bbbcc7a 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -882,7 +882,7 @@ static int adreno_get_pwrlevels(struct device *dev,
 {
unsigned long freq = ULONG_MAX;
struct dev_pm_opp *opp;
-   int ret;
+   int ret, i;

gpu->fast_rate = 0;

@@ -890,9 +890,29 @@ static int adreno_get_pwrlevels(struct device *dev,
if (!of_find_property(dev->of_node, "operating-points-v2", NULL))
ret = adreno_get_legacy_pwrlevels(dev);
else {
-   ret = dev_pm_opp_of_add_table(dev);
-   if (ret)
-   DRM_DEV_ERROR(dev, "Unable to set the OPP table\n");
+   int count = of_count_phandle_with_args(dev->of_node,
+   "operating-points-v2", NULL);
+
+   count = min(count, GPU_DDR_OPP_TABLE_INDEX + 1);
+   count = max(count, 1);
+
+   for (i = 0; i < count; i++) {
+   ret = dev_pm_opp_of_add_table_indexed(dev, i);
+   if (ret) {
+   DRM_DEV_ERROR(dev, "Add OPP table %d: failed 
%d\n",
+   i, ret);
+   goto err;
+   }
+
+   gpu->opp_tables[i] =
+   dev_pm_opp_get_opp_table_indexed(dev, i);
+   if (!gpu->opp_tables[i]) {
+   DRM_DEV_ERROR(dev, "Get OPP table failed index 
%d\n",
+   i);
+   ret = -EINVAL;
+   goto err;
+   }
+   }
}

if (!ret) {
@@ -919,12 +939,24 @@ static int adreno_get_pwrlevels(struct device *dev,
gpu->icc_path = NULL;

return 0;
+err:
+   for (; i >= 0; i--) {
+   if (gpu->opp_tables[i]) {
+   dev_pm_opp_put_opp_table(gpu->opp_tables[i]);
+   gpu->opp_tables[i] = NULL;
+   

[PATCH 4/5] drm: msm: a6xx: Fix off by one error when setting GPU freq

2020-03-31 Thread Sharat Masetty
This patch fixes an error in the for loop, thereby allowing search on
the full list of possible GPU power levels.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 489d9b6..81b8559 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -176,7 +176,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long 
freq)
if (freq == gmu->freq)
return;

-   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
+   for (perf_index = 0; perf_index < gmu->nr_gpu_freqs; perf_index++)
if (freq == gmu->gpu_freqs[perf_index])
break;

--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 5/5] dt-bindings: drm/msm/gpu: Document OPP phandle list for the GPU

2020-03-31 Thread Sharat Masetty
Update the documentation for listing the multiple optional GPU and the
DDR OPP tables to help enable DDR scaling.

Signed-off-by: Sharat Masetty 
---
 .../devicetree/bindings/display/msm/gpu.txt| 63 +-
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt 
b/Documentation/devicetree/bindings/display/msm/gpu.txt
index 70025cb..ff3ae1b 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@@ -21,7 +21,10 @@ Required properties:
   following devices should not list clocks:
- qcom,adreno-630.2
 - iommus: optional phandle to an adreno iommu instance
-- operating-points-v2: optional phandle to the OPP operating points
+- operating-points-v2: optional phandles to the OPP operating point tables
+  one for the GPU OPPs and the other for the GPU->DDR OPPs. Note that if
+  multiple OPP tables are specified, the GPU OPP table(considered primary)
+  should be the first in the phandle list.
 - interconnects: optional phandle to an interconnect provider.  See
   ../interconnect/interconnect.txt for details.
 - qcom,gmu: For GMU attached devices a phandle to the GMU device that will
@@ -75,7 +78,7 @@ Example a6xx (with GMU):

iommus = <_smmu 0>;

-   operating-points-v2 = <_opp_table>;
+   operating-points-v2 = <_opp_table>, <_ddr_bw_opp_table>;

interconnects = <_hlos MASTER_GFX3D _hlos SLAVE_EBI1>;

@@ -85,5 +88,61 @@ Example a6xx (with GMU):
memory-region = <_shader_region>;
firmware-name = "qcom/LENOVO/81JL/qcdxkmsuc850.mbn"
};
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = ;
+   required-opps = <_ddr_bw_opp6>;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   required-opps = <_ddr_bw_opp4>;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = ;
+   required-opps = <_ddr_bw_opp4>;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = ;
+   required-opps = <_ddr_bw_opp2>;
+   };
+   };
+
+   gpu_ddr_bw_opp_table: gpu-ddr-bw-opp-table {
+   compatible = "operating-points-v2";
+
+   gpu_ddr_bw_opp1: opp-3  {
+   opp-peak-kBps =/bits/ 32 <120>;
+   };
+
+   gpu_ddr_bw_opp2: opp-45100  {
+   opp-peak-kBps =/bits/ 32 <1804000>;
+   };
+
+   gpu_ddr_bw_opp3: opp-54700  {
+   opp-peak-kBps =/bits/ 32 <2188000>;
+   };
+
+   gpu_ddr_bw_opp4: opp-76800  {
+   opp-peak-kBps =/bits/ 32 <3072000>;
+   };
+
+   gpu_ddr_bw_opp5: opp-101700  {
+   opp-peak-kBps =/bits/ 32 <4068000>;
+   };
+
+   gpu_ddr_bw_opp6: opp-135300  {
+   opp-peak-kBps =/bits/ 32 <5412000>;
+   };
+   };
};
 };
--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/5] arm64: dts: qcom: sc7180: Add interconnect bindings for GPU

2020-03-31 Thread Sharat Masetty
This patch adds the interconnect bindings to the GPU node. This enables
the GPU->DDR path bandwidth voting.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 1097e8b..51630dd 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1718,6 +1718,8 @@
operating-points-v2 = <_opp_table>;
qcom,gmu = <>;

+   interconnects = <_noc MASTER_GFX3D _virt 
SLAVE_EBI1>;
+
gpu_opp_table: opp-table {
compatible = "operating-points-v2";

--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/5] arm64: dts: qcom: sc7180: Add GPU DDR BW opp table

2020-03-31 Thread Sharat Masetty
This patch adds a new opp table listing the GPU DDR bandwidth opps. Also
adds a required_opp binding to the GPUs main OPP table which holds a
phandle to a bandwidth opp in the new table. This enables linking the
GPU power level opp to the DDR bandwidth opp and helps with scaling
DDR along with GPU frequency.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 50 +++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 51630dd..74b023b 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1715,7 +1715,8 @@
reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
interrupts = ;
iommus = <_smmu 0>;
-   operating-points-v2 = <_opp_table>;
+   operating-points-v2 = <_opp_table>,
+   <_ddr_bw_opp_table>;
qcom,gmu = <>;

interconnects = <_noc MASTER_GFX3D _virt 
SLAVE_EBI1>;
@@ -1726,40 +1727,87 @@
opp-8 {
opp-hz = /bits/ 64 <8>;
opp-level = 
;
+   required-opps = <_ddr_bw_opp9>;
};

opp-65000 {
opp-hz = /bits/ 64 <65000>;
opp-level = 
;
+   required-opps = <_ddr_bw_opp8>;
};

opp-56500 {
opp-hz = /bits/ 64 <56500>;
opp-level = ;
+   required-opps = <_ddr_bw_opp6>;
};

opp-43000 {
opp-hz = /bits/ 64 <43000>;
opp-level = 
;
+   required-opps = <_ddr_bw_opp6>;
};

opp-35500 {
opp-hz = /bits/ 64 <35500>;
opp-level = ;
+   required-opps = <_ddr_bw_opp4>;
};

opp-26700 {
opp-hz = /bits/ 64 <26700>;
opp-level = 
;
+   required-opps = <_ddr_bw_opp4>;
};

opp-18000 {
opp-hz = /bits/ 64 <18000>;
opp-level = 
;
+   required-opps = <_ddr_bw_opp2>;
};
};
};

+   gpu_ddr_bw_opp_table: gpu-ddr-bw-opp-table {
+   compatible = "operating-points-v2";
+
+   gpu_ddr_bw_opp1: opp-3  {
+   opp-peak-kBps =/bits/ 32 <120>;
+   };
+
+   gpu_ddr_bw_opp2: opp-45100  {
+   opp-peak-kBps =/bits/ 32 <1804000>;
+   };
+
+   gpu_ddr_bw_opp3: opp-54700  {
+   opp-peak-kBps =/bits/ 32 <2188000>;
+   };
+
+   gpu_ddr_bw_opp4: opp-76800  {
+   opp-peak-kBps =/bits/ 32 <3072000>;
+   };
+
+   gpu_ddr_bw_opp5: opp-101700  {
+   opp-peak-kBps =/bits/ 32 <4068000>;
+   };
+
+   gpu_ddr_bw_opp6: opp-135300  {
+   opp-peak-kBps =/bits/ 32 <5412000>;
+   };
+
+   gpu_ddr_bw_opp7: opp-155500  {
+   opp-peak-kBps =/bits/ 32 <622>;
+   };
+
+   gpu_ddr_bw_opp8: opp-180400  {
+   opp-peak-kBps =/bits/ 32 <7216000>;
+   };
+
+   gpu_ddr_bw_opp9: opp-213300  {
+   opp-peak-kBps =/bits/ 32 <8532000>;
+  

[PATCH 0/5] Add support for GPU DDR BW scaling

2020-03-31 Thread Sharat Masetty
This series adds support for GPU DDR bandwidth scaling and is based on the
bindings from Sarvana[1]. This work is based on Sibi's work for CPU side [2]
which also lists all the needed dependencies to get this series working.
My workspace is based on a chrome tag [3]. Although the bindings add support
for both peak and average bandwidth votes, I have only added support for peak
bandwidth votes.

[1]: https://patchwork.kernel.org/cover/11277199/
[2]: https://patchwork.kernel.org/cover/11353185/ (this lists further 
dependencies)
[3]: 
https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2097039/3

Sharat Masetty (5):
  arm64: dts: qcom: sc7180: Add interconnect bindings for GPU
  arm64: dts: qcom: sc7180: Add GPU DDR BW opp table
  drm: msm: scale DDR BW along with GPU frequency
  drm: msm: a6xx: Fix off by one error when setting GPU freq
  dt-bindings: drm/msm/gpu: Document OPP phandle list for the GPU

 .../devicetree/bindings/display/msm/gpu.txt| 63 +-
 arch/arm64/boot/dts/qcom/sc7180.dtsi   | 52 +-
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c  | 43 ---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c| 44 +--
 drivers/gpu/drm/msm/msm_gpu.h  |  9 
 5 files changed, 197 insertions(+), 14 deletions(-)

--
2.7.4
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] drm: msm: a6x: Disable interrupts before recovery

2020-03-02 Thread Sharat Masetty
This patch disables interrupts in the GPU RBBM hang detect fault handler
before going to recovery.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index dc8ec2c..4dd0f62 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -676,6 +676,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
 
+   /* Disable interrupts before going for a recovery*/
+   gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
+
/* Turn off the hangcheck timer to keep it from bothering us */
del_timer(>hangcheck_timer);
 
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] dt-bindings: arm-smmu: update the list of clocks

2020-03-01 Thread Sharat Masetty


On 2/26/2020 8:03 PM, Rob Herring wrote:

On Wed, Feb 26, 2020 at 5:17 AM Sharat Masetty  wrote:


On 2/21/2020 2:05 AM, Rob Herring wrote:

On Thu, 20 Feb 2020 13:42:22 +0530, Sharat Masetty wrote:

This patch adds a clock definition needed for powering on the GPU TBUs
and the GPU TCU.

Signed-off-by: Sharat Masetty 
---
   Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 3 +++
   1 file changed, 3 insertions(+)


My bot found errors running 'make dt_binding_check' on your patch:

Documentation/devicetree/bindings/display/simple-framebuffer.example.dts:21.16-37.11:
 Warning (chosen_node_is_root): /example-0/chosen: chosen node must be at root 
node
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/iommu/arm,smmu.example.dt.yaml:
 iommu@d0: clock-names: ['bus', 'iface'] is too short
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/iommu/arm,smmu.example.dt.yaml:
 iommu@d0: clocks: [[4294967295, 123], [4294967295, 124]] is too short

See https://patchwork.ozlabs.org/patch/1241297
Please check and re-submit.

Hi Rob, These issues seem to be from the original code and not related
to my patch. Are these going to be blocking errors?

There are no errors in this binding in mainline. You've added a 3rd
clock when all the existing users have exactly 2 clocks.


Rob,

Adding something like the following seems to be solving the bot errors, 
but I am not certain if this is the right way to address this issue. Can 
you please comment?


   clock-names:
+    minItems: 2
+    maxItems: 3
 items:
   - const: bus
   - const: iface
+  - const: mem_iface_clk

   clocks:
+    minItems: 2
+    maxItems: 3
 items:
   - description: bus clock required for downstream bus access and 
for the

   smmu ptw
   - description: interface clock required to access smmu's registers
   through the TCU's programming interface.
+  - description: core clock required for the GPU SMMU TBUs and the 
GPU TCU.




Rob
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] dt-bindings: arm-smmu: update the list of clocks

2020-02-26 Thread Sharat Masetty



On 2/21/2020 2:05 AM, Rob Herring wrote:

On Thu, 20 Feb 2020 13:42:22 +0530, Sharat Masetty wrote:

This patch adds a clock definition needed for powering on the GPU TBUs
and the GPU TCU.

Signed-off-by: Sharat Masetty 
---
  Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 3 +++
  1 file changed, 3 insertions(+)


My bot found errors running 'make dt_binding_check' on your patch:

Documentation/devicetree/bindings/display/simple-framebuffer.example.dts:21.16-37.11:
 Warning (chosen_node_is_root): /example-0/chosen: chosen node must be at root 
node
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/iommu/arm,smmu.example.dt.yaml:
 iommu@d0: clock-names: ['bus', 'iface'] is too short
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/iommu/arm,smmu.example.dt.yaml:
 iommu@d0: clocks: [[4294967295, 123], [4294967295, 124]] is too short

See https://patchwork.ozlabs.org/patch/1241297
Please check and re-submit.
Hi Rob, These issues seem to be from the original code and not related 
to my patch. Are these going to be blocking errors?

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v6] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-02-20 Thread Sharat Masetty



On 2/11/2020 2:51 AM, Doug Anderson wrote:

Hi,

On Sun, Feb 9, 2020 at 11:41 PM Sharat Masetty  wrote:

This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 

In v4 I added my tags [1].  Please keep them for future patches unless
something major changes.  AKA:

Reviewed-by: Douglas Anderson 
Tested-by: Douglas Anderson 



---
  arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
  1 file changed, 102 insertions(+)

Just to summarize where we are:

* As per my notes in v3 and v4 [1], this can't land until
"mem_iface_clk" is in the bindings.  Please post a patch for this and
reply with a link here so others can follow the disucssion.

I posted a patch for this @https://patchwork.freedesktop.org/patch/354130/


* This also can't land until the gpucc bindings change from Taniya
recently [2] lands.

...so we're in limbo waiting for the bindings to be resolved, but
otherwise this patch looks good.


[1] 
https://lore.kernel.org/r/CAD=FV=ueq0moxudrszrcj8g6jb0elf1ttn+mn7t6d2tpcmu...@mail.gmail.com
[2] 
https://lore.kernel.org/r/1581307266-26989-1-git-send-email-t...@codeaurora.org


-Doug

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] dt-bindings: arm-smmu: update the list of clocks

2020-02-20 Thread Sharat Masetty
This patch adds a clock definition needed for powering on the GPU TBUs
and the GPU TCU.

Signed-off-by: Sharat Masetty 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml 
b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6515dbe..235c0df 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
   - enum:
   - qcom,msm8996-smmu-v2
   - qcom,msm8998-smmu-v2
+  - qcom,sc7180-smmu-v2
   - qcom,sdm845-smmu-v2
   - const: qcom,smmu-v2

@@ -116,6 +117,7 @@ properties:
 items:
   - const: bus
   - const: iface
+  - const: mem_iface_clk

   clocks:
 items:
@@ -123,6 +125,7 @@ properties:
   smmu ptw
   - description: interface clock required to access smmu's registers
   through the TCU's programming interface.
+  - description: core clock required for the GPU SMMU TBUs and the GPU TCU

   power-domains:
 maxItems: 1
--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] Update arm,smmu.yaml bindings doc

2020-02-20 Thread Sharat Masetty
Patch [1] adds a "mem_iface_clock" in the clocks list for smmu device.
This patch updates the yaml doc file for smmu by adding the definition
for this new clock.

1: https://patchwork.freedesktop.org/patch/352718/

Sharat Masetty (1):
  dt-bindings: arm-smmu: update clocks and bindings for sc7180 SoC

 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 3 +++
 1 file changed, 3 insertions(+)

--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v6] sc7180: Add A618 GPU bindings

2020-02-09 Thread Sharat Masetty
I used this branch qcom/arm64-for-5.6-to-be-rebased as suggested by Matthias.
This patch needs the clock patches and the clock patches have not yet landed, so
please apply the following series and patches in order

a) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203517=%2a=both
b) Patches 1 and 2 from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203527=both=%2a
c) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=221739=both=%2a
d) 
https://lore.kernel.org/linux-arm-msm/20200124144154.v2.10.I1a4b93fb005791e29a9dcf288fc8bd459a555a59%40changeid/raw
e) https://patchwork.kernel.org/project/linux-clk/list/?series=238893
f) This patch "arm64: dts: qcom: sc7180: Add A618 gpu dt blob"

v3: Addressed review comments from previous submits. Also removed the
interconnect bindings from this patch and I will submit as a new patch with its
dependencies listed. Also I will be sending a new patch for updating the
bindings documentation.

v4: Add GX_GDSC power domain binding for GMU

v5: Change to a dummy GX_GDSC binding for faster landing

v6: Rebased changes on top of Taniyas lastet post((e) in the list above) and
adding back the GX_GDSC binding.

Sharat Masetty (1):
  arm64: dts: qcom: sc7180: Add A618 gpu dt blob

 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v6] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-02-09 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index f3fcc5c..63fff15 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1043,6 +1043,108 @@
};
};
 
+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>,
+   <0 0x05061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+   interrupts = ;
+   iommus = <_smmu 0>;
+   operating-points-v2 = <_opp_table>;
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x0504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618.0", "qcom,adreno-gmu";
+   reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 
0x1>,
+   <0 0x0b49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_GPU_MEMNOC_GFX_CLK>;
+   clock-names = "gmu", "cxo", "axi", "memnoc";
+   power-domains = < CX_GDSC>, < GX_GDSC>;
+   power-domain-names = "cx

[PATCH v5] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-02-08 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index f3fcc5c..5cdcbce 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1043,6 +1043,108 @@
};
};
 
+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>,
+   <0 0x05061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+   interrupts = ;
+   iommus = <_smmu 0>;
+   operating-points-v2 = <_opp_table>;
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x0504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618.0", "qcom,adreno-gmu";
+   reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 
0x1>,
+   <0 0x0b49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_GPU_MEMNOC_GFX_CLK>;
+   clock-names = "gmu", "cxo", "axi", "memnoc";
+   power-domains = < CX_GDSC>, <0>;
+   power-domain-names = "cx

[PATCH v5] sc7180: Add A618 GPU bindings

2020-02-08 Thread Sharat Masetty
I used this branch qcom/arm64-for-5.6-to-be-rebased as suggested by Matthias.
This patch needs the clock patches and the clock patches have not yet landed, so
please apply the following series and patches in order

a) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203517=%2a=both
b) Patches 1 and 2 from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203527=both=%2a
c) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=221739=both=%2a
d) 
https://lore.kernel.org/linux-arm-msm/20200124144154.v2.10.I1a4b93fb005791e29a9dcf288fc8bd459a555a59%40changeid/raw
e) This patch "arm64: dts: qcom: sc7180: Add A618 gpu dt blob"

v3: Addressed review comments from previous submits. Also removed the
interconnect bindings from this patch and I will submit as a new patch with its
dependencies listed. Also I will be sending a new patch for updating the
bindings documentation.

v4: Add GX_GDSC power domain binding for GMU

v5: Change to a dummy GX_GDSC binding for faster landing

Sharat Masetty (1):
  arm64: dts: qcom: sc7180: Add A618 gpu dt blob

 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 3/3] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-02-04 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index f3fcc5c..63fff15 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1043,6 +1043,108 @@
};
};
 
+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>,
+   <0 0x05061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+   interrupts = ;
+   iommus = <_smmu 0>;
+   operating-points-v2 = <_opp_table>;
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x0504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618.0", "qcom,adreno-gmu";
+   reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 
0x1>,
+   <0 0x0b49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_GPU_MEMNOC_GFX_CLK>;
+   clock-names = "gmu", "cxo", "axi", "memnoc";
+   power-domains = < CX_GDSC>, < GX_GDSC>;
+   power-domain-names = "cx

[PATCH v4 1/3] dt-bindings: clk: qcom: Add support for GPU GX GDSCR

2020-02-04 Thread Sharat Masetty
From: Taniya Das 

In the cases where the GPU SW requires to use the GX GDSCR add
support for the same.

Signed-off-by: Taniya Das 
---
 include/dt-bindings/clock/qcom,gpucc-sc7180.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/dt-bindings/clock/qcom,gpucc-sc7180.h 
b/include/dt-bindings/clock/qcom,gpucc-sc7180.h
index 0e4643b..65e706d 100644
--- a/include/dt-bindings/clock/qcom,gpucc-sc7180.h
+++ b/include/dt-bindings/clock/qcom,gpucc-sc7180.h
@@ -15,7 +15,8 @@
 #define GPU_CC_CXO_CLK 6
 #define GPU_CC_GMU_CLK_SRC 7
 
-/* CAM_CC GDSCRs */
+/* GPU_CC GDSCRs */
 #define CX_GDSC0
+#define GX_GDSC1
 
 #endif
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 0/3] sc7180: Add A618 GPU bindings

2020-02-04 Thread Sharat Masetty
I used this branch qcom/arm64-for-5.6-to-be-rebased as suggested by Matthias.
This patch needs the clock patches and the clock patches have not yet landed, so

please apply the following series and patches in order
a) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203517=%2a=both
b) Patches 1 and 2 from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203527=both=%2a
c) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=221739=both=%2a
d) 
https://lore.kernel.org/linux-arm-msm/20200124144154.v2.10.I1a4b93fb005791e29a9dcf288fc8bd459a555a59%40changeid/raw
e) This patch "arm64: dts: qcom: sc7180: Add A618 gpu dt blob"

v3: Addressed review comments from previous submits. Also removed the
interconnect bindings from this patch and I will submit as a new patch with its
dependencies listed. Also I will be sending a new patch for updating the
bindings documentation.

v4: Add GX_GDSC power domain binding for GMU

Sharat Masetty (1):
  arm64: dts: qcom: sc7180: Add A618 gpu dt blob

Taniya Das (2):
  dt-bindings: clk: qcom: Add support for GPU GX GDSCR
  clk: qcom: gpucc: Add support for GX GDSC for SC7180

 arch/arm64/boot/dts/qcom/sc7180.dtsi  | 102 ++
 drivers/clk/qcom/gpucc-sc7180.c   |  37 ++
 include/dt-bindings/clock/qcom,gpucc-sc7180.h |   3 +-
 3 files changed, 141 insertions(+), 1 deletion(-)

--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v4 2/3] clk: qcom: gpucc: Add support for GX GDSC for SC7180

2020-02-04 Thread Sharat Masetty
From: Taniya Das 

 Most of the time the CPU should not be touching the GX domain on the
 GPU
 except for a very special use case when the CPU needs to force the GX
 headswitch off. Add a dummy enable function for the GX gdsc to simulate
 success so that the pm_runtime reference counting is correct.

Signed-off-by: Taniya Das 
---
 drivers/clk/qcom/gpucc-sc7180.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/drivers/clk/qcom/gpucc-sc7180.c b/drivers/clk/qcom/gpucc-sc7180.c
index ec61194..3b29f19 100644
--- a/drivers/clk/qcom/gpucc-sc7180.c
+++ b/drivers/clk/qcom/gpucc-sc7180.c
@@ -172,8 +172,45 @@ enum {
.flags = VOTABLE,
 };
 
+/*
+ * On SC7180 the GPU GX domain is *almost* entirely controlled by the GMU
+ * running in the CX domain so the CPU doesn't need to know anything about the
+ * GX domain EXCEPT
+ *
+ * Hardware constraints dictate that the GX be powered down before the CX. If
+ * the GMU crashes it could leave the GX on. In order to successfully bring 
back
+ * the device the CPU needs to disable the GX headswitch. There being no sane
+ * way to reach in and touch that register from deep inside the GPU driver we
+ * need to set up the infrastructure to be able to ensure that the GPU can
+ * ensure that the GX is off during this super special case. We do this by
+ * defining a GX gdsc with a dummy enable function and a "default" disable
+ * function.
+ *
+ * This allows us to attach with genpd_dev_pm_attach_by_name() in the GPU
+ * driver. During power up, nothing will happen from the CPU (and the GMU will
+ * power up normally but during power down this will ensure that the GX domain
+ * is *really* off - this gives us a semi standard way of doing what we need.
+ */
+static int gx_gdsc_enable(struct generic_pm_domain *domain)
+{
+   /* Do nothing but give genpd the impression that we were successful */
+   return 0;
+}
+
+static struct gdsc gx_gdsc = {
+   .gdscr = 0x100c,
+   .clamp_io_ctrl = 0x1508,
+   .pd = {
+   .name = "gpu_gx_gdsc",
+   .power_on = gx_gdsc_enable,
+   },
+   .pwrsts = PWRSTS_OFF_ON,
+   .flags = CLAMP_IO,
+};
+
 static struct gdsc *gpu_cc_sc7180_gdscs[] = {
[CX_GDSC] = _gdsc,
+   [GX_GDSC] = _gdsc,
 };
 
 static struct clk_regmap *gpu_cc_sc7180_clocks[] = {
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v3] Add A618 GPU nodes

2020-01-31 Thread Sharat Masetty
I used this branch qcom/arm64-for-5.6-to-be-rebased as suggested by Matthias.
This patch needs the clock patches and the clock patches have not yet landed, so
please apply the following series and patches in order

a) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203517=%2a=both
b) Patches 1 and 2 from 
https://patchwork.kernel.org/project/linux-clk/list/?series=203527=both=%2a
c) All patches from 
https://patchwork.kernel.org/project/linux-clk/list/?series=221739=both=%2a
d) 
https://lore.kernel.org/linux-arm-msm/20200124144154.v2.10.I1a4b93fb005791e29a9dcf288fc8bd459a555a59%40changeid/raw
e) This patch "arm64: dts: qcom: sc7180: Add A618 gpu dt blob"

v3: Addressed review comments from previous submits. Also removed the
interconnect bindings from this patch and I will submit as a new patch with its
dependencies listed. Also I will be sending a new patch for updating the
bindings documentation.

Sharat Masetty (1):
  arm64: dts: qcom: sc7180: Add A618 gpu dt blob

 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v3] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-01-31 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 102 +++
 1 file changed, 102 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index f3fcc5c..b39461c 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1043,6 +1043,108 @@
};
};
 
+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>,
+   <0 0x05061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+   interrupts = ;
+   iommus = <_smmu 0>;
+   operating-points-v2 = <_opp_table>;
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x0504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618.0", "qcom,adreno-gmu";
+   reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 
0x1>,
+   <0 0x0b49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_GPU_MEMNOC_GFX_CLK>;
+   clock-names = "gmu", "cxo", "axi", "memnoc";
+   power-domains = < CX_GDSC>;
+   power-domain-names = "cx

[PATCH v2] arm64: dts: qcom: sc7180: Add A618 gpu dt blob

2020-01-27 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 103 +++
 1 file changed, 103 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index b859431..277d84d 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -7,6 +7,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1619,6 +1620,108 @@
#interconnect-cells = <1>;
qcom,bcm-voters = <_bcm_voter>;
};
+
+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x0500 0 0x4>, <0 0x0509e000 0 0x1000>,
+   <0 0x05061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+   interrupts = ;
+   iommus = <_smmu 0>;
+   operating-points-v2 = <_opp_table>;
+   interconnects = <_noc MASTER_GFX3D _virt 
SLAVE_EBI1>;
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+   opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+   opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x0504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618", "qcom,adreno-gmu";
+   reg = <0 0x0506a000 0 0x31000>, <0 0x0b29 0 
0x1>,
+   <0 0x0b49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_G

[PATCH v2 2/3] drm: msm: a6xx: Add support for A618

2020-01-22 Thread Sharat Masetty
This patch adds support for enabling Graphics Bus Interface(GBIF)
used in multiple A6xx series chipets. Also makes changes to the
PDC/RSC sequencing specifically required for A618. This is needed
for proper interfacing with RPMH.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx.xml.h   | 52 -
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   | 24 --
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 80 +
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |  9 +++-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 12 -
 5 files changed, 160 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx.xml.h 
b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
index f44553e..ed78fee 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
@@ -16,11 +16,11 @@
 - /home/robclark/src/envytools/rnndb/adreno/a3xx.xml  (  83840 bytes, 
from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a4xx.xml  ( 112086 bytes, 
from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a5xx.xml  ( 147240 bytes, 
from 2018-12-02 17:29:54)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml  ( 140790 bytes, 
from 2018-12-02 17:29:54)
+- /home/smasetty/playarea/envytools/rnndb/adreno/a6xx.xml ( 161969 bytes, 
from 2019-11-29 07:18:16)
 - /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml  (  10431 bytes, 
from 2018-09-14 13:03:07)
 - /home/robclark/src/envytools/rnndb/adreno/ocmem.xml (   1773 bytes, 
from 2018-07-03 19:37:13)
 
-Copyright (C) 2013-2018 by the following authors:
+Copyright (C) 2013-2019 by the following authors:
 - Rob Clark  (robclark)
 - Ilia Mirkin  (imirkin)
 
@@ -2519,6 +2519,54 @@ static inline uint32_t 
A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val)
 
 #define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2   0x311a
 
+#define REG_A6XX_GBIF_SCACHE_CNTL1 0x3c02
+
+#define REG_A6XX_GBIF_QSB_SIDE0
0x3c03
+
+#define REG_A6XX_GBIF_QSB_SIDE1
0x3c04
+
+#define REG_A6XX_GBIF_QSB_SIDE2
0x3c05
+
+#define REG_A6XX_GBIF_QSB_SIDE3
0x3c06
+
+#define REG_A6XX_GBIF_HALT 0x3c45
+
+#define REG_A6XX_GBIF_HALT_ACK 0x3c46
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_EN  0x3cc0
+
+#define REG_A6XX_GBIF_PERF_CNT_SEL 0x3cc2
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_SEL 0x3cc3
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW00x3cc4
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW10x3cc5
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW20x3cc6
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW30x3cc7
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH0   0x3cc8
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH1   0x3cc9
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH2   0x3cca
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH3   0x3ccb
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW0 0x3ccc
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW1 0x3ccd
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW2 0x3cce
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH00x3ccf
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH10x3cd0
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH20x3cd1
+
 #define REG_A6XX_RB_WINDOW_OFFSET2 0x88d4
 #define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE   0x8000
 #define A6XX_RB_WINDOW_OFFSET2_X__MASK 0x7fff
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 85f14fe..158a74c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
 
 #include 
 #include 
@@ -433,6 +433,8 @@ static void __iomem *a6xx_gmu_get_mmio(struct 
platform_device *pdev,
 
 static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
 {
+   struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+   struct adreno_gpu *adreno_gpu = _gpu->base;
struct platform_device *pdev = to_platform_device(gmu->dev);
void __iomem *pdcptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc");
void __iomem *seqptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc_seq");
@@ -480

[PATCH v2 3/3] drm: msm: a6xx: Dump GBIF registers, debugbus in gpu state

2020-01-22 Thread Sharat Masetty
Add the relevant GBIF registers and the debug bus to the a6xx gpu
state. This comes in pretty handy when debugging GPU bus related
issues.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 52 +++--
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 16 +++--
 2 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 691c1a27..d6023ba 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
 
 #include 
 #include "msm_gem.h"
@@ -320,6 +320,7 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
 {
struct resource *res;
void __iomem *cxdbg = NULL;
+   int nr_debugbus_blocks;
 
/* Set up the GX debug bus */
 
@@ -374,9 +375,11 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
}
 
-   a6xx_state->debugbus = state_kcalloc(a6xx_state,
-   ARRAY_SIZE(a6xx_debugbus_blocks),
-   sizeof(*a6xx_state->debugbus));
+   nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
+   (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
+
+   a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
+   sizeof(*a6xx_state->debugbus));
 
if (a6xx_state->debugbus) {
int i;
@@ -388,15 +391,31 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
_state->debugbus[i]);
 
a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
+
+   /*
+* GBIF has same debugbus as of other GPU blocks, fall back to
+* default path if GPU uses GBIF, also GBIF uses exactly same
+* ID as of VBIF.
+*/
+   if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
+   a6xx_get_debugbus_block(gpu, a6xx_state,
+   _gbif_debugbus_block,
+   _state->debugbus[i]);
+
+   a6xx_state->nr_debugbus += 1;
+   }
}
 
-   a6xx_state->vbif_debugbus =
-   state_kcalloc(a6xx_state, 1,
-   sizeof(*a6xx_state->vbif_debugbus));
+   /*  Dump the VBIF debugbus on applicable targets */
+   if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
+   a6xx_state->vbif_debugbus =
+   state_kcalloc(a6xx_state, 1,
+   sizeof(*a6xx_state->vbif_debugbus));
 
-   if (a6xx_state->vbif_debugbus)
-   a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
-   a6xx_state->vbif_debugbus);
+   if (a6xx_state->vbif_debugbus)
+   a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
+   a6xx_state->vbif_debugbus);
+   }
 
if (cxdbg) {
a6xx_state->cx_debugbus =
@@ -770,14 +789,16 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
_state->gmu_registers[1]);
 }
 
+#define A6XX_GBIF_REGLIST_SIZE   1
 static void a6xx_get_registers(struct msm_gpu *gpu,
struct a6xx_gpu_state *a6xx_state,
struct a6xx_crashdumper *dumper)
 {
int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
ARRAY_SIZE(a6xx_reglist) +
-   ARRAY_SIZE(a6xx_hlsq_reglist);
+   ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
int index = 0;
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 
a6xx_state->registers = state_kcalloc(a6xx_state,
count, sizeof(*a6xx_state->registers));
@@ -792,6 +813,15 @@ static void a6xx_get_registers(struct msm_gpu *gpu,
a6xx_state, _ahb_reglist[i],
_state->registers[index++]);
 
+   if (a6xx_has_gbif(adreno_gpu))
+   a6xx_get_ahb_gpu_registers(gpu,
+   a6xx_state, _gbif_reglist,
+   _state->registers[index++]);
+   else
+   a6xx_get_ahb_gpu_registers(gpu,
+   a6xx_state, _vbif_reglist,
+   _state->registers[index++]);
+
for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
a6xx_get_crashdumper_registers(gpu,
a6xx_state, _reglist[i],
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
index 68cccfa..e67c20c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_g

[PATCH v2 1/3] drm: msm: Add 618 gpu to the adreno gpu list

2020-01-22 Thread Sharat Masetty
This patch adds Adreno 618 entry and its associated properties
to the gpulist entries.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_device.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index fbbdf86..cb3a6e5 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -167,6 +167,17 @@
.init = a5xx_gpu_init,
.zapfw = "a540_zap.mdt",
}, {
+   .rev = ADRENO_REV(6, 1, 8, ANY_ID),
+   .revn = 618,
+   .name = "A618",
+   .fw = {
+   [ADRENO_FW_SQE] = "a630_sqe.fw",
+   [ADRENO_FW_GMU] = "a630_gmu.bin",
+   },
+   .gmem = SZ_512K,
+   .inactive_period = DRM_MSM_INACTIVE_PERIOD,
+   .init = a6xx_gpu_init,
+   }, {
.rev = ADRENO_REV(6, 3, 0, ANY_ID),
.revn = 630,
.name = "A630",
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 3/7] drm/msm: Attach the IOMMU device during initialization

2020-01-02 Thread Sharat Masetty
From: Jordan Crouse 

Everywhere an IOMMU object is created by msm_gpu_create_address_space
the IOMMU device is attached immediately after. Instead of carrying around
the infrastructure to do the attach from the device specific code do it
directly in the msm_iommu_init() function. This gets it out of the way for
more aggressive cleanups that follow.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  |  9 -
 drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c |  5 -
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c |  8 
 drivers/gpu/drm/msm/msm_gem_vma.c| 23 +++
 drivers/gpu/drm/msm/msm_gpu.c| 11 +--
 drivers/gpu/drm/msm/msm_gpummu.c |  7 ---
 drivers/gpu/drm/msm/msm_iommu.c  | 16 +++-
 drivers/gpu/drm/msm/msm_mmu.h|  1 -
 8 files changed, 27 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index ee93ba3..fdd9e2e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -736,7 +736,6 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms)
 {
struct iommu_domain *domain;
struct msm_gem_address_space *aspace;
-   int ret;
 
domain = iommu_domain_alloc(_bus_type);
if (!domain)
@@ -752,14 +751,6 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms)
return PTR_ERR(aspace);
}
 
-   ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports,
-   ARRAY_SIZE(iommu_ports));
-   if (ret) {
-   DPU_ERROR("failed to attach iommu %d\n", ret);
-   msm_gem_address_space_put(aspace);
-   return ret;
-   }
-
dpu_kms->base.aspace = aspace;
return 0;
 }
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c 
b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
index 50711cc..c1c48df 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
@@ -523,11 +523,6 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
}
 
kms->aspace = aspace;
-
-   ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports,
-   ARRAY_SIZE(iommu_ports));
-   if (ret)
-   goto fail;
} else {
DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys "
"contig buffers for scanout\n");
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c 
b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 91cd76a..2d66ae2 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -736,14 +736,6 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
}
 
kms->aspace = aspace;
-
-   ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports,
-   ARRAY_SIZE(iommu_ports));
-   if (ret) {
-   DRM_DEV_ERROR(>dev, "failed to attach iommu: 
%d\n",
-   ret);
-   goto fail;
-   }
} else {
DRM_DEV_INFO(>dev,
 "no iommu, fallback to phys contig buffers for 
scanout\n");
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c 
b/drivers/gpu/drm/msm/msm_gem_vma.c
index 1af5354..91d993a 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -131,8 +131,8 @@ struct msm_gem_address_space *
const char *name)
 {
struct msm_gem_address_space *aspace;
-   u64 size = domain->geometry.aperture_end -
-   domain->geometry.aperture_start;
+   u64 start = domain->geometry.aperture_start;
+   u64 size = domain->geometry.aperture_end - start;
 
aspace = kzalloc(sizeof(*aspace), GFP_KERNEL);
if (!aspace)
@@ -141,9 +141,18 @@ struct msm_gem_address_space *
spin_lock_init(>lock);
aspace->name = name;
aspace->mmu = msm_iommu_new(dev, domain);
+   if (IS_ERR(aspace->mmu)) {
+   int ret = PTR_ERR(aspace->mmu);
 
-   drm_mm_init(>mm, (domain->geometry.aperture_start >> 
PAGE_SHIFT),
-   size >> PAGE_SHIFT);
+   kfree(aspace);
+   return ERR_PTR(ret);
+   }
+
+   /*
+* Attaching the IOMMU device changes the aperture values so use the
+* cached values instead
+*/
+   drm_mm_init(>mm, start >> PAGE_SHIFT, size >> PAGE_SHIFT);
 
kref_init(>kref);
 
@@ -164,6 +173,12 @@ struct msm_gem_address_space *
spin_lock_init(>lock);
aspace->name = name;
aspace->mmu = msm_gpummu_new(dev, gpu);
+   if (IS_ERR(aspace->mmu)) {
+   int ret = PTR_ERR(aspace->mmu);
+
+   kfree(aspace);
+   return ERR_PTR(ret);
+   }
 

[PATCH v2 6/7] drm/msm: rearrange the gpu_rmw() function

2020-01-02 Thread Sharat Masetty
The register read-modify-write construct is generic enough
that it can be used by other subsystems as needed, create
a more generic rmw() function and have the gpu_rmw() use
this new function.

Signed-off-by: Sharat Masetty 
Reviewed-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/msm_drv.c | 8 
 drivers/gpu/drm/msm/msm_drv.h | 1 +
 drivers/gpu/drm/msm/msm_gpu.h | 5 +
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index f50fefb..4c4559f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -165,6 +165,14 @@ u32 msm_readl(const void __iomem *addr)
return val;
 }
 
+void msm_rmw(void __iomem *addr, u32 mask, u32 or)
+{
+   u32 val = msm_readl(addr);
+
+   val &= ~mask;
+   msm_writel(val | or, addr);
+}
+
 struct msm_vblank_work {
struct work_struct work;
int crtc_id;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 2203729..1bc778b 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -405,6 +405,7 @@ void __iomem *msm_ioremap(struct platform_device *pdev, 
const char *name,
const char *dbgname);
 void msm_writel(u32 data, void __iomem *addr);
 u32 msm_readl(const void __iomem *addr);
+void msm_rmw(void __iomem *addr, u32 mask, u32 or);
 
 struct msm_gpu_submitqueue;
 int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 41d86c2..935a12f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -223,10 +223,7 @@ static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
 
 static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
 {
-   uint32_t val = gpu_read(gpu, reg);
-
-   val &= ~mask;
-   gpu_write(gpu, reg, val | or);
+   msm_rmw(gpu->mmio + (reg << 2), mask, or);
 }
 
 static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 5/7] drm: msm: a6xx: Properly free up the iommu objects

2020-01-02 Thread Sharat Masetty
This patch calls the right function to destroy the iommu domain as well
as free the associated iommu structure there by facilitating proper
clean up of resources upon failure of creating an address space.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 0d72e6c..ab562f6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -896,7 +896,7 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)

aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, 0x);
if (IS_ERR(aspace))
-   iommu_domain_free(iommu);
+   mmu->funcs->destroy(mmu);

return aspace;
 }
--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 4/7] drm/msm: Refactor address space initialization

2020-01-02 Thread Sharat Masetty
From: Jordan Crouse 

Refactor how address space initialization works. Instead of having the
address space function create the MMU object (and thus require separate but
equal functions for gpummu and iommu) use a single function and pass the
MMU struct. Make the generic code cleaner by using target specific
functions to create the address space so a2xx can do its own thing in its
own space.  For all the other targets use a generic helper to initialize
IOMMU but leave the door open for newer targets to use customization
if they need it.

This patch also adds support for a6xx specific mmu initialization to
allow for future extensions like supporting split pagetables.

Signed-off-by: Jordan Crouse 
Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a2xx_gpu.c| 16 ++
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c|  1 +
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c|  1 +
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c|  1 +
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c| 24 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.c  | 23 ++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h  |  8 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  | 10 +++---
 drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 14 +
 drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c |  4 ---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 11 +--
 drivers/gpu/drm/msm/msm_drv.h|  8 ++---
 drivers/gpu/drm/msm/msm_gem_vma.c| 52 +---
 drivers/gpu/drm/msm/msm_gpu.c| 40 ++--
 drivers/gpu/drm/msm/msm_gpu.h|  4 +--
 drivers/gpu/drm/msm/msm_iommu.c  |  3 ++
 16 files changed, 106 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 1f83bc1..60f6472 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -401,6 +401,21 @@ static struct msm_gpu_state *a2xx_gpu_state_get(struct 
msm_gpu *gpu)
return state;
 }

+static struct msm_gem_address_space *
+a2xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
+{
+   struct msm_mmu *mmu = msm_gpummu_new(>dev, gpu);
+   struct msm_gem_address_space *aspace;
+
+   aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M,
+   SZ_16M + 0xfff * SZ_64K);
+
+   if (IS_ERR(aspace) && !IS_ERR(mmu))
+   mmu->funcs->destroy(mmu);
+
+   return aspace;
+}
+
 /* Register offset defines for A2XX - copy of A3XX */
 static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
@@ -429,6 +444,7 @@ static struct msm_gpu_state *a2xx_gpu_state_get(struct 
msm_gpu *gpu)
 #endif
.gpu_state_get = a2xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+   .create_address_space = a2xx_create_address_space,
},
 };

diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 5f7e980..825e6f6 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -448,6 +448,7 @@ static struct msm_gpu_state *a3xx_gpu_state_get(struct 
msm_gpu *gpu)
 #endif
.gpu_state_get = a3xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+   .create_address_space = adreno_iommu_create_address_space,
},
 };

diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index ab2b752..0484e48 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -538,6 +538,7 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t 
*value)
 #endif
.gpu_state_get = a4xx_gpu_state_get,
.gpu_state_put = adreno_gpu_state_put,
+   .create_address_space = adreno_iommu_create_address_space,
},
.get_timestamp = a4xx_get_timestamp,
 };
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 99cd6e6..b5cc407 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1394,6 +1394,7 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
.gpu_busy = a5xx_gpu_busy,
.gpu_state_get = a5xx_gpu_state_get,
.gpu_state_put = a5xx_gpu_state_put,
+   .create_address_space = adreno_iommu_create_address_space,
},
.get_timestamp = a5xx_get_timestamp,
 };
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index daf0780..0d72e6c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -878,6 +878,29 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
return (unsigned long)busy_time;
 }

+static s

[PATCH v2 7/7] drm/msm/a6xx: Add support for using system cache(LLC)

2020-01-02 Thread Sharat Masetty
The last level system cache can be partitioned to 32 different slices
of which GPU has two slices preallocated. One slice is used for caching GPU
buffers and the other slice is used for caching the GPU SMMU pagetables.
This patch talks to the core system cache driver to acquire the slice handles,
configure the SCID's to those slices and activates and deactivates the slices
upon GPU power collapse and restore.

Some support from the IOMMU driver is also needed to make use of the
system cache. IOMMU_QCOM_SYS_CACHE is a buffer protection flag which enables
caching GPU data buffers in the system cache with memory attributes such
as outer cacheable, read-allocate, write-allocate for buffers. The GPU
then has the ability to override a few cacheability parameters which it
does to override write-allocate to write-no-allocate as the GPU hardware
does not benefit much from it.

Similarly DOMAIN_ATTR_QCOM_SYS_CACHE is another domain level attribute
used by the IOMMU driver to set the right attributes to cache the hardware
pagetables into the system cache.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 100 ++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   3 +
 drivers/gpu/drm/msm/msm_iommu.c   |   3 +
 drivers/gpu/drm/msm/msm_mmu.h |   4 ++
 4 files changed, 110 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index ab562f6..d15eb99f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -9,6 +9,8 @@
 #include "a6xx_gmu.xml.h"

 #include 
+#include 
+#include 

 #define GPU_PAS_ID 13

@@ -781,6 +783,81 @@ static void a6xx_bus_clear_pending_transactions(struct 
adreno_gpu *adreno_gpu)
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
 }

+static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
+{
+   return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
+{
+   return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
+}
+
+static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
+{
+   llcc_slice_deactivate(a6xx_gpu->llc_slice);
+   llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
+{
+   u32 cntl1_regval = 0;
+
+   if (IS_ERR(a6xx_gpu->llc_mmio))
+   return;
+
+   if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
+   u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
+
+   gpu_scid &= 0x1f;
+   cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) |
+   (gpu_scid << 10) | (gpu_scid << 15) | (gpu_scid << 20);
+   }
+
+   if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
+   u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
+
+   gpuhtw_scid &= 0x1f;
+   cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
+   }
+
+   if (cntl1_regval) {
+   /*
+* Program the slice IDs for the various GPU blocks and GPU MMU
+* pagetables
+*/
+   a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   cntl1_regval);
+
+   /*
+* Program cacheability overrides to not allocate cache lines on
+* a write miss
+*/
+   a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0,
+   0xF, 0x03);
+   }
+}
+
+static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
+{
+   llcc_slice_putd(a6xx_gpu->llc_slice);
+   llcc_slice_putd(a6xx_gpu->htw_llc_slice);
+}
+
+static void a6xx_llc_slices_init(struct platform_device *pdev,
+   struct a6xx_gpu *a6xx_gpu)
+{
+   a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+   if (IS_ERR(a6xx_gpu->llc_mmio))
+   return;
+
+   a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
+   a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
+
+   if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
+   a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
+}
+
 static int a6xx_pm_resume(struct msm_gpu *gpu)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -795,6 +872,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)

msm_gpu_resume_devfreq(gpu);

+   a6xx_llc_activate(a6xx_gpu);
+
return 0;
 }

@@ -803,6 +882,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);

+   a6xx_llc_deactivate(a6xx_gpu);
+
devfreq_suspend_device(gp

[PATCH v2 1/7] iommu/arm-smmu: Pass io_pgtable_cfg to impl specific init_context

2020-01-02 Thread Sharat Masetty
From: Jordan Crouse 

Pass the propposed io_pgtable_cfg to the implementation specific
init_context() function to give the implementation an opportunity to
to modify it before it gets passed to io-pgtable.

Signed-off-by: Jordan Crouse 
Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm-smmu-impl.c |  3 ++-
 drivers/iommu/arm-smmu.c  | 11 ++-
 drivers/iommu/arm-smmu.h  |  3 ++-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index b2fe72a..33ed682 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -68,7 +68,8 @@ static int cavium_cfg_probe(struct arm_smmu_device *smmu)
return 0;
 }

-static int cavium_init_context(struct arm_smmu_domain *smmu_domain)
+static int cavium_init_context(struct arm_smmu_domain *smmu_domain,
+   struct io_pgtable_cfg *pgtbl_cfg)
 {
struct cavium_smmu *cs = container_of(smmu_domain->smmu,
  struct cavium_smmu, smmu);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index eee48f9..4f7e0c0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -758,11 +758,6 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
cfg->asid = cfg->cbndx;

smmu_domain->smmu = smmu;
-   if (smmu->impl && smmu->impl->init_context) {
-   ret = smmu->impl->init_context(smmu_domain);
-   if (ret)
-   goto out_unlock;
-   }

smmu_domain->pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap  = smmu->pgsize_bitmap,
@@ -773,6 +768,12 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
.iommu_dev  = smmu->dev,
};

+   if (smmu->impl && smmu->impl->init_context) {
+   ret = smmu->impl->init_context(smmu_domain, 
_domain->pgtbl_cfg);
+   if (ret)
+   goto out_unlock;
+   }
+
if (smmu_domain->non_strict)
smmu_domain->pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;

diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index b2df38c..f57cdbe 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -335,7 +335,8 @@ struct arm_smmu_impl {
u64 val);
int (*cfg_probe)(struct arm_smmu_device *smmu);
int (*reset)(struct arm_smmu_device *smmu);
-   int (*init_context)(struct arm_smmu_domain *smmu_domain);
+   int (*init_context)(struct arm_smmu_domain *smmu_domain,
+   struct io_pgtable_cfg *pgtbl_cfg);
 };

 static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 2/7] iommu/arm-smmu: Add domain attribute for QCOM system cache

2020-01-02 Thread Sharat Masetty
From: Vivek Gautam 

Add iommu domain attribute for using system cache aka last level
cache on QCOM SoCs by client drivers like GPU to set right
attributes for caching the hardware pagetables into the system cache.

Signed-off-by: Vivek Gautam 
Co-developed-by: Sai Prakash Ranjan 
Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm-smmu-qcom.c | 10 ++
 drivers/iommu/arm-smmu.c  | 14 ++
 drivers/iommu/arm-smmu.h  |  1 +
 include/linux/iommu.h |  1 +
 4 files changed, 26 insertions(+)

diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
index 24c071c..d1d22df 100644
--- a/drivers/iommu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm-smmu-qcom.c
@@ -30,7 +30,17 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device 
*smmu)
return ret;
 }
 
+static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg)
+{
+   if (smmu_domain->sys_cache)
+   pgtbl_cfg->coherent_walk = false;
+
+   return 0;
+}
+
 static const struct arm_smmu_impl qcom_smmu_impl = {
+   .init_context = qcom_smmu_init_context,
.reset = qcom_sdm845_smmu500_reset,
 };
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 4f7e0c0..055b548 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1466,6 +1466,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == 
ARM_SMMU_DOMAIN_NESTED);
return 0;
+   case DOMAIN_ATTR_QCOM_SYS_CACHE:
+   *((int *)data) = smmu_domain->sys_cache;
+   return 0;
default:
return -ENODEV;
}
@@ -1506,6 +1509,17 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
+   case DOMAIN_ATTR_QCOM_SYS_CACHE:
+   if (smmu_domain->smmu) {
+   ret = -EPERM;
+   goto out_unlock;
+   }
+
+   if (*((int *)data))
+   smmu_domain->sys_cache = true;
+   else
+   smmu_domain->sys_cache = false;
+   break;
default:
ret = -ENODEV;
}
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index f57cdbe..8aeaaf0 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -322,6 +322,7 @@ struct arm_smmu_domain {
struct mutexinit_mutex; /* Protects smmu pointer */
spinlock_t  cb_lock; /* Serialises ATS1* ops and 
TLB syncs */
struct iommu_domain domain;
+   boolsys_cache;
 };
 
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0c60e75..bd61c60 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -127,6 +127,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+   DOMAIN_ATTR_QCOM_SYS_CACHE,
DOMAIN_ATTR_MAX,
 };
 
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2 0/7] drm/msm/a6xx: System Cache Support

2020-01-02 Thread Sharat Masetty
Some hardware variants contain a system level cache or the last level
cache(llc). This cache is typically a large block which is shared by multiple
clients on the SOC. GPU uses the system cache to cache both the GPU data
buffers(like textures) as well the SMMU pagetables. This helps with
improved render performance as well as lower power consumption by reducing
the bus traffic to the system memory.

The system cache architecture allows the cache to be split into slices which
then be used by multiple SOC clients. This patch series is an effort to enable
and use two of those slices perallocated for the GPU, one for the GPU data
buffers and another for the GPU SMMU hardware pagetables.

v2: Code reviews and rebased code on top of Jordan's split pagetables series

To enable the system cache driver, add [1] to your stack if not already
present. Please review.

[1] https://lore.kernel.org/patchwork/patch/1165298/

Jordan Crouse (3):
  iommu/arm-smmu: Pass io_pgtable_cfg to impl specific init_context
  drm/msm: Attach the IOMMU device during initialization
  drm/msm: Refactor address space initialization

Sharat Masetty (3):
  drm: msm: a6xx: Properly free up the iommu objects
  drm/msm: rearrange the gpu_rmw() function
  drm/msm/a6xx: Add support for using system cache(LLC)

Vivek Gautam (1):
  iommu/arm-smmu: Add domain attribute for QCOM system cache

 drivers/gpu/drm/msm/adreno/a2xx_gpu.c|  16 
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c|   1 +
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c|   1 +
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c|   1 +
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c| 124 +++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h|   3 +
 drivers/gpu/drm/msm/adreno/adreno_gpu.c  |  23 --
 drivers/gpu/drm/msm/adreno/adreno_gpu.h  |   8 ++
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c  |  19 ++---
 drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c |  19 ++---
 drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c |   4 -
 drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c |  19 +++--
 drivers/gpu/drm/msm/msm_drv.c|   8 ++
 drivers/gpu/drm/msm/msm_drv.h|   9 +--
 drivers/gpu/drm/msm/msm_gem_vma.c|  37 ++---
 drivers/gpu/drm/msm/msm_gpu.c|  49 +---
 drivers/gpu/drm/msm/msm_gpu.h|   9 +--
 drivers/gpu/drm/msm/msm_gpummu.c |   7 --
 drivers/gpu/drm/msm/msm_iommu.c  |  22 +++---
 drivers/gpu/drm/msm/msm_mmu.h|   5 +-
 drivers/iommu/arm-smmu-impl.c|   3 +-
 drivers/iommu/arm-smmu-qcom.c|  10 +++
 drivers/iommu/arm-smmu.c |  25 +--
 drivers/iommu/arm-smmu.h |   4 +-
 include/linux/iommu.h|   1 +
 25 files changed, 269 insertions(+), 158 deletions(-)

--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 4/5] drm/msm: Pass mmu features to generic layers

2019-12-19 Thread Sharat Masetty
Allow different Adreno targets the ability to pass
specific mmu features to the generic layers. This will
help conditionally configure certain iommu features for
certain Adreno targets.

Also Add a few simple support functions to support a bitmask of
features that a specific MMU implementation supports.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a2xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   |  2 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |  4 +++-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  2 +-
 drivers/gpu/drm/msm/msm_gpu.c   |  6 --
 drivers/gpu/drm/msm/msm_gpu.h   |  1 +
 drivers/gpu/drm/msm/msm_mmu.h   | 11 +++
 10 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
index 1f83bc1..bbac43c 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c
@@ -472,7 +472,7 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
 
adreno_gpu->reg_offsets = a2xx_register_offsets;
 
-   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1);
+   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1, 0);
if (ret)
goto fail;
 
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 5f7e980..63448fb 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -488,7 +488,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a3xx_registers;
adreno_gpu->reg_offsets = a3xx_register_offsets;
 
-   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1);
+   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1, 0);
if (ret)
goto fail;
 
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index ab2b752..90ae26d 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -572,7 +572,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a4xx_registers;
adreno_gpu->reg_offsets = a4xx_register_offsets;
 
-   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1);
+   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1, 0);
if (ret)
goto fail;
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 99cd6e6..a51ed2e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1445,7 +1445,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
 
check_speed_bin(>dev);
 
-   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 4);
+   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 4, 0);
if (ret) {
a5xx_destroy(&(a5xx_gpu->base.base));
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index daf0780..faff6ff 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -924,7 +924,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = NULL;
adreno_gpu->reg_offsets = a6xx_register_offsets;
 
-   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1);
+   ret = adreno_gpu_init(dev, pdev, adreno_gpu, , 1, 0);
if (ret) {
a6xx_destroy(&(a6xx_gpu->base.base));
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 048c8be..7dade16 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -895,7 +895,8 @@ static int adreno_get_pwrlevels(struct device *dev,
 
 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct adreno_gpu *adreno_gpu,
-   const struct adreno_gpu_funcs *funcs, int nr_rings)
+   const struct adreno_gpu_funcs *funcs, int nr_rings,
+   u32 mmu_features)
 {
struct adreno_platform_config *config = pdev->dev.platform_data;
struct msm_gpu_config adreno_gpu_config  = { 0 };
@@ -916,6 +917,7 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,
adreno_gpu_config.va_end = SZ_16M + 0xfff * SZ_64K;
 
adreno_gpu_config.nr_rings = nr_rings;
+   adreno_gpu_config.mmu_features = mmu_features;
 
adreno_get_pwrlevels(>dev, gpu);
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index e12d5a9..27716f6 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -248,7 +2

[PATCH 5/5] drm/msm/a6xx: Add support for using system cache(LLC)

2019-12-19 Thread Sharat Masetty
The last level system cache can be partitioned to 32 different slices
of which GPU has two slices preallocated. One slice is used for caching GPU
buffers and the other slice is used for caching the GPU SMMU pagetables.
This patch talks to the core system cache driver to acquire the slice handles,
configure the SCID's to those slices and activates and deactivates the slices
upon GPU power collapse and restore.

Some support from the IOMMU driver is also needed to make use of the
system cache. IOMMU_QCOM_SYS_CACHE is a buffer protection flag which enables
caching GPU data buffers in the system cache with memory attributes such
as outer cacheable, read-allocate, write-allocate for buffers. The GPU
then has the ability to override a few cacheability parameters which it
does to override write-allocate to write-no-allocate as the GPU hardware
does not benefit much from it.

Similarly DOMAIN_ATTR_QCOM_SYS_CACHE is another domain level attribute
used by the IOMMU driver to set the right attributes to cache the hardware
pagetables into the system cache.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 122 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   9 +++
 drivers/gpu/drm/msm/msm_iommu.c   |  13 
 drivers/gpu/drm/msm/msm_mmu.h |   3 +
 4 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index faff6ff..0c7fdee 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -9,6 +9,7 @@
 #include "a6xx_gmu.xml.h"

 #include 
+#include 

 #define GPU_PAS_ID 13

@@ -781,6 +782,117 @@ static void a6xx_bus_clear_pending_transactions(struct 
adreno_gpu *adreno_gpu)
gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
 }

+#define A6XX_LLC_NUM_GPU_SCIDS 5
+#define A6XX_GPU_LLC_SCID_NUM_BITS 5
+
+#define A6XX_GPU_LLC_SCID_MASK \
+   ((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+
+#define A6XX_GPUHTW_LLC_SCID_SHIFT 25
+#define A6XX_GPUHTW_LLC_SCID_MASK \
+   (((1 << A6XX_GPU_LLC_SCID_NUM_BITS) - 1) << A6XX_GPUHTW_LLC_SCID_SHIFT)
+
+static inline void a6xx_gpu_cx_rmw(struct a6xx_llc *llc,
+   u32 reg, u32 mask, u32 or)
+{
+   msm_rmw(llc->mmio + (reg << 2), mask, or);
+}
+
+static void a6xx_llc_deactivate(struct a6xx_llc *llc)
+{
+   llcc_slice_deactivate(llc->gpu_llc_slice);
+   llcc_slice_deactivate(llc->gpuhtw_llc_slice);
+}
+
+static void a6xx_llc_activate(struct a6xx_llc *llc)
+{
+   if (!llc->mmio)
+   return;
+
+   /* Program the sub-cache ID for all GPU blocks */
+   if (!llcc_slice_activate(llc->gpu_llc_slice))
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPU_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+A6XX_GPU_LLC_SCID_MASK));
+
+   /* Program the sub-cache ID for the GPU pagetables */
+   if (!llcc_slice_activate(llc->gpuhtw_llc_slice))
+   a6xx_gpu_cx_rmw(llc,
+   REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1,
+   A6XX_GPUHTW_LLC_SCID_MASK,
+   (llc->cntl1_regval &
+A6XX_GPUHTW_LLC_SCID_MASK));
+
+   /* Program cacheability overrides */
+   a6xx_gpu_cx_rmw(llc, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF,
+   llc->cntl0_regval);
+}
+
+static void a6xx_llc_slices_destroy(struct a6xx_llc *llc)
+{
+   if (llc->mmio)
+   iounmap(llc->mmio);
+
+   llcc_slice_putd(llc->gpu_llc_slice);
+   llcc_slice_putd(llc->gpuhtw_llc_slice);
+}
+
+static int a6xx_llc_slices_init(struct platform_device *pdev,
+   struct a6xx_llc *llc)
+{
+   llc->mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+   if (IS_ERR_OR_NULL(llc->mmio))
+   return -ENODEV;
+
+   llc->gpu_llc_slice = llcc_slice_getd(LLCC_GPU);
+   llc->gpuhtw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
+   if (IS_ERR(llc->gpu_llc_slice) && IS_ERR(llc->gpuhtw_llc_slice))
+   return -ENODEV;
+
+   /*
+* CNTL0 provides options to override the settings for the
+* read and write allocation policies for the LLC. These
+* overrides are global for all memory transactions from
+* the GPU.
+*
+* 0x3: read-no-alloc-overridden = 0
+*  read-no-alloc = 0 - Allocate lines on read miss
+*  write-no-alloc-overridden = 1
+*  write-no-alloc = 1 - Do not allocates lines on write miss
+*/
+   llc->cntl0_regval = 0x03;
+
+   /*
+* CNTL1 is used to specify SCID for (CP, TP, VFD, CCU and UBWC
+* F

[PATCH 1/5] iommu/arm-smmu: Pass io_pgtable_cfg to impl specific init_context

2019-12-19 Thread Sharat Masetty
From: Jordan Crouse 

Pass the propposed io_pgtable_cfg to the implementation specific
init_context() function to give the implementation an opportunity to
to modify it before it gets passed to io-pgtable.

Signed-off-by: Jordan Crouse 
Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm-smmu-impl.c |  3 ++-
 drivers/iommu/arm-smmu.c  | 11 ++-
 drivers/iommu/arm-smmu.h  |  3 ++-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index b2fe72a..33ed682 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -68,7 +68,8 @@ static int cavium_cfg_probe(struct arm_smmu_device *smmu)
return 0;
 }

-static int cavium_init_context(struct arm_smmu_domain *smmu_domain)
+static int cavium_init_context(struct arm_smmu_domain *smmu_domain,
+   struct io_pgtable_cfg *pgtbl_cfg)
 {
struct cavium_smmu *cs = container_of(smmu_domain->smmu,
  struct cavium_smmu, smmu);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index eee48f9..4f7e0c0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -758,11 +758,6 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
cfg->asid = cfg->cbndx;

smmu_domain->smmu = smmu;
-   if (smmu->impl && smmu->impl->init_context) {
-   ret = smmu->impl->init_context(smmu_domain);
-   if (ret)
-   goto out_unlock;
-   }

smmu_domain->pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap  = smmu->pgsize_bitmap,
@@ -773,6 +768,12 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
.iommu_dev  = smmu->dev,
};

+   if (smmu->impl && smmu->impl->init_context) {
+   ret = smmu->impl->init_context(smmu_domain, 
_domain->pgtbl_cfg);
+   if (ret)
+   goto out_unlock;
+   }
+
if (smmu_domain->non_strict)
smmu_domain->pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;

diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index b2df38c..f57cdbe 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -335,7 +335,8 @@ struct arm_smmu_impl {
u64 val);
int (*cfg_probe)(struct arm_smmu_device *smmu);
int (*reset)(struct arm_smmu_device *smmu);
-   int (*init_context)(struct arm_smmu_domain *smmu_domain);
+   int (*init_context)(struct arm_smmu_domain *smmu_domain,
+   struct io_pgtable_cfg *pgtbl_cfg);
 };

 static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 0/5] drm/msm/a6xx: System Cache Support

2019-12-19 Thread Sharat Masetty
Some hardware variants contain a system level cache or the last level
cache(llc). This cache is typically a large block which is shared by multiple
clients on the SOC. GPU uses the system cache to cache both the GPU data
buffers(like textures) as well the SMMU pagetables. This helps with
improved render performance as well as lower power consumption by reducing
the bus traffic to the system memory.

The system cache architecture allows the cache to be split into slices which
then be used by multiple SOC clients. This patch series is an effort to enable
and use two of those slices perallocated for the GPU, one for the GPU data
buffers and another for the GPU SMMU hardware pagetables.

To enable the system cache driver, add [1] to your stack if not already
present. Please review.

[1] https://lore.kernel.org/patchwork/patch/1165298/

Jordan Crouse (1):
  iommu/arm-smmu: Pass io_pgtable_cfg to impl specific init_context

Sharat Masetty (3):
  drm/msm: rearrange the gpu_rmw() function
  drm/msm: Pass mmu features to generic layers
  drm/msm/a6xx: Add support for using system cache(LLC)

Vivek Gautam (1):
  iommu/arm-smmu: Add domain attribute for QCOM system cache

 drivers/gpu/drm/msm/adreno/a2xx_gpu.c   |   2 +-
 drivers/gpu/drm/msm/adreno/a3xx_gpu.c   |   2 +-
 drivers/gpu/drm/msm/adreno/a4xx_gpu.c   |   2 +-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c   |   2 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 122 +++-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |   9 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.c |   4 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |   2 +-
 drivers/gpu/drm/msm/msm_drv.c   |   8 +++
 drivers/gpu/drm/msm/msm_drv.h   |   1 +
 drivers/gpu/drm/msm/msm_gpu.c   |   6 +-
 drivers/gpu/drm/msm/msm_gpu.h   |   6 +-
 drivers/gpu/drm/msm/msm_iommu.c |  13 
 drivers/gpu/drm/msm/msm_mmu.h   |  14 
 drivers/iommu/arm-smmu-impl.c   |   3 +-
 drivers/iommu/arm-smmu-qcom.c   |  10 +++
 drivers/iommu/arm-smmu.c|  25 +--
 drivers/iommu/arm-smmu.h|   4 +-
 include/linux/iommu.h   |   1 +
 19 files changed, 216 insertions(+), 20 deletions(-)

--
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 3/5] drm/msm: rearrange the gpu_rmw() function

2019-12-19 Thread Sharat Masetty
The register read-modify-write construct is generic enough
that it can be used by other subsystems as needed, create
a more generic rmw() function and have the gpu_rmw() use
this new function.

Signed-off-by: Sharat Masetty 
Reviewed-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/msm_drv.c | 8 
 drivers/gpu/drm/msm/msm_drv.h | 1 +
 drivers/gpu/drm/msm/msm_gpu.h | 5 +
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index f50fefb..4c4559f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -165,6 +165,14 @@ u32 msm_readl(const void __iomem *addr)
return val;
 }
 
+void msm_rmw(void __iomem *addr, u32 mask, u32 or)
+{
+   u32 val = msm_readl(addr);
+
+   val &= ~mask;
+   msm_writel(val | or, addr);
+}
+
 struct msm_vblank_work {
struct work_struct work;
int crtc_id;
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 71547e7..997729e 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -409,6 +409,7 @@ void __iomem *msm_ioremap(struct platform_device *pdev, 
const char *name,
const char *dbgname);
 void msm_writel(u32 data, void __iomem *addr);
 u32 msm_readl(const void __iomem *addr);
+void msm_rmw(void __iomem *addr, u32 mask, u32 or);
 
 struct msm_gpu_submitqueue;
 int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index ab8f0f9c..a58ef16 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -223,10 +223,7 @@ static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
 
 static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
 {
-   uint32_t val = gpu_read(gpu, reg);
-
-   val &= ~mask;
-   gpu_write(gpu, reg, val | or);
+   msm_rmw(gpu->mmio + (reg << 2), mask, or);
 }
 
 static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/5] iommu/arm-smmu: Add domain attribute for QCOM system cache

2019-12-19 Thread Sharat Masetty
From: Vivek Gautam 

Add iommu domain attribute for using system cache aka last level
cache on QCOM SoCs by client drivers like GPU to set right
attributes for caching the hardware pagetables into the system cache.

Signed-off-by: Vivek Gautam 
Co-developed-by: Sai Prakash Ranjan 
Signed-off-by: Sai Prakash Ranjan 
---
 drivers/iommu/arm-smmu-qcom.c | 10 ++
 drivers/iommu/arm-smmu.c  | 14 ++
 drivers/iommu/arm-smmu.h  |  1 +
 include/linux/iommu.h |  1 +
 4 files changed, 26 insertions(+)

diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
index 24c071c..d1d22df 100644
--- a/drivers/iommu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm-smmu-qcom.c
@@ -30,7 +30,17 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device 
*smmu)
return ret;
 }
 
+static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg)
+{
+   if (smmu_domain->sys_cache)
+   pgtbl_cfg->coherent_walk = false;
+
+   return 0;
+}
+
 static const struct arm_smmu_impl qcom_smmu_impl = {
+   .init_context = qcom_smmu_init_context,
.reset = qcom_sdm845_smmu500_reset,
 };
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 4f7e0c0..055b548 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1466,6 +1466,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == 
ARM_SMMU_DOMAIN_NESTED);
return 0;
+   case DOMAIN_ATTR_QCOM_SYS_CACHE:
+   *((int *)data) = smmu_domain->sys_cache;
+   return 0;
default:
return -ENODEV;
}
@@ -1506,6 +1509,17 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
+   case DOMAIN_ATTR_QCOM_SYS_CACHE:
+   if (smmu_domain->smmu) {
+   ret = -EPERM;
+   goto out_unlock;
+   }
+
+   if (*((int *)data))
+   smmu_domain->sys_cache = true;
+   else
+   smmu_domain->sys_cache = false;
+   break;
default:
ret = -ENODEV;
}
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index f57cdbe..8aeaaf0 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -322,6 +322,7 @@ struct arm_smmu_domain {
struct mutexinit_mutex; /* Protects smmu pointer */
spinlock_t  cb_lock; /* Serialises ATS1* ops and 
TLB syncs */
struct iommu_domain domain;
+   boolsys_cache;
 };
 
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0c60e75..bd61c60 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -127,6 +127,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+   DOMAIN_ATTR_QCOM_SYS_CACHE,
DOMAIN_ATTR_MAX,
 };
 
-- 
1.9.1
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] arm: dts: sc7180: Add A618 gpu dt blob

2019-12-03 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 116 +++
 1 file changed, 116 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index c3db2e5..31223d0 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -18,6 +18,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 / {
interrupt-parent = <>;
@@ -733,6 +735,120 @@
#power-domain-cells = <1>;
};

+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x500 0 0x4>, <0 0x509e000 0 0x1000>,
+   <0 0x5061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+
+   interrupts = ;
+
+   iommus = <_smmu 0>;
+
+   operating-points-v2 = <_opp_table>;
+
+   interconnects = <_noc 35 _virt 512>;
+
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618", "qcom,adreno-gmu";
+
+   reg =   <0 0x506a000 0 0x31000>,
+   <0 0xb29 0 0x1>,
+   <0 0xb49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_GPU_MEM

[PATCH 5/5] arm: dts: sc7180: Add A618 gpu dt blob

2019-12-03 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 116 +++
 1 file changed, 116 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index c3db2e5..31223d0 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -18,6 +18,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 

 / {
interrupt-parent = <>;
@@ -733,6 +735,120 @@
#power-domain-cells = <1>;
};

+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x500 0 0x4>, <0 0x509e000 0 0x1000>,
+   <0 0x5061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+
+   interrupts = ;
+
+   iommus = <_smmu 0>;
+
+   operating-points-v2 = <_opp_table>;
+
+   interconnects = <_noc 35 _virt 512>;
+
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618", "qcom,adreno-gmu";
+
+   reg =   <0 0x506a000 0 0x31000>,
+   <0 0xb29 0 0x1>,
+   <0 0xb49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DDRSS_GPU_AXI_CLK>,
+  < GCC_GPU_MEM

[PATCH 0/5] Add support for A618 GPU

2019-12-03 Thread Sharat Masetty
This patch adds support for A618 GPU. Please review.

Sharat Masetty (5):
  drm: msm: Add 618 gpu to the adreno gpu list
  drm: msm: a6xx: Add support for A618
  drm: msm: a6xx: Dump GBIF registers, debugbus in gpu state
  drm: msm: a6xx: fix debug bus register configuration
  arm: dts: sc7180: Add A618 gpu dt blob

 arch/arm64/boot/dts/qcom/sc7180.dtsi| 116 
 drivers/gpu/drm/msm/adreno/a6xx.xml.h   |  52 -
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   |  24 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   |  70 +++--
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |   9 ++-
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  76 --
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h |  16 +++-
 drivers/gpu/drm/msm/adreno/adreno_device.c  |  11 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  12 ++-
 9 files changed, 348 insertions(+), 38 deletions(-)

--
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 1/5] drm: msm: Add 618 gpu to the adreno gpu list

2019-12-03 Thread Sharat Masetty
This patch adds Adreno 618 entry and its associated properties
to the gpulist entries.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_device.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 0888e0d..e728ea5 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -152,6 +152,17 @@
.init = a5xx_gpu_init,
.zapfw = "a540_zap.mdt",
}, {
+   .rev = ADRENO_REV(6, 1, 8, ANY_ID),
+   .revn = 618,
+   .name = "A618",
+   .fw = {
+   [ADRENO_FW_SQE] = "a630_sqe.fw",
+   [ADRENO_FW_GMU] = "a630_gmu.bin",
+   },
+   .gmem = SZ_512K,
+   .inactive_period = DRM_MSM_INACTIVE_PERIOD,
+   .init = a6xx_gpu_init,
+   }, {
.rev = ADRENO_REV(6, 3, 0, ANY_ID),
.revn = 630,
.name = "A630",
--
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 4/5] drm: msm: a6xx: fix debug bus register configuration

2019-12-03 Thread Sharat Masetty
Fix the cx debugbus related register configuration, to collect accurate
bus data during gpu snapshot. This helps with complete snapshot dump
and also complete proper GPU recovery.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 99b5a41..d6023ba 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -353,26 +353,26 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg = ioremap(res->start, resource_size(res));

if (cxdbg) {
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
0x76543210);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
0xFEDCBA98);

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
}

nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
--
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 3/5] drm: msm: a6xx: Dump GBIF registers, debugbus in gpu state

2019-12-03 Thread Sharat Masetty
Add the relevant GBIF registers and the debug bus to the a6xx gpu
state. This comes in pretty handy when debugging GPU bus related
issues.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 52 +++--
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 16 +++--
 2 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index e686331..99b5a41 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */

 #include 
 #include "msm_gem.h"
@@ -320,6 +320,7 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
 {
struct resource *res;
void __iomem *cxdbg = NULL;
+   int nr_debugbus_blocks;

/* Set up the GX debug bus */

@@ -374,9 +375,11 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
}

-   a6xx_state->debugbus = state_kcalloc(a6xx_state,
-   ARRAY_SIZE(a6xx_debugbus_blocks),
-   sizeof(*a6xx_state->debugbus));
+   nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
+   (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
+
+   a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
+   sizeof(*a6xx_state->debugbus));

if (a6xx_state->debugbus) {
int i;
@@ -388,15 +391,31 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
_state->debugbus[i]);

a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
+
+   /*
+* GBIF has same debugbus as of other GPU blocks, fall back to
+* default path if GPU uses GBIF, also GBIF uses exactly same
+* ID as of VBIF.
+*/
+   if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
+   a6xx_get_debugbus_block(gpu, a6xx_state,
+   _gbif_debugbus_block,
+   _state->debugbus[i]);
+
+   a6xx_state->nr_debugbus += 1;
+   }
}

-   a6xx_state->vbif_debugbus =
-   state_kcalloc(a6xx_state, 1,
-   sizeof(*a6xx_state->vbif_debugbus));
+   /*  Dump the VBIF debugbus on applicable targets */
+   if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
+   a6xx_state->vbif_debugbus =
+   state_kcalloc(a6xx_state, 1,
+   sizeof(*a6xx_state->vbif_debugbus));

-   if (a6xx_state->vbif_debugbus)
-   a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
-   a6xx_state->vbif_debugbus);
+   if (a6xx_state->vbif_debugbus)
+   a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
+   a6xx_state->vbif_debugbus);
+   }

if (cxdbg) {
a6xx_state->cx_debugbus =
@@ -770,14 +789,16 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
_state->gmu_registers[1]);
 }

+#define A6XX_GBIF_REGLIST_SIZE   1
 static void a6xx_get_registers(struct msm_gpu *gpu,
struct a6xx_gpu_state *a6xx_state,
struct a6xx_crashdumper *dumper)
 {
int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
ARRAY_SIZE(a6xx_reglist) +
-   ARRAY_SIZE(a6xx_hlsq_reglist);
+   ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
int index = 0;
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);

a6xx_state->registers = state_kcalloc(a6xx_state,
count, sizeof(*a6xx_state->registers));
@@ -792,6 +813,15 @@ static void a6xx_get_registers(struct msm_gpu *gpu,
a6xx_state, _ahb_reglist[i],
_state->registers[index++]);

+   if (a6xx_has_gbif(adreno_gpu))
+   a6xx_get_ahb_gpu_registers(gpu,
+   a6xx_state, _gbif_reglist,
+   _state->registers[index++]);
+   else
+   a6xx_get_ahb_gpu_registers(gpu,
+   a6xx_state, _vbif_reglist,
+   _state->registers[index++]);
+
for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
a6xx_get_crashdumper_registers(gpu,
a6xx_state, _reglist[i],
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
index 68cccfa..e67c20c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state

[PATCH 2/5] drm: msm: a6xx: Add support for A618

2019-12-03 Thread Sharat Masetty
This patch adds support for enabling Graphics Bus Interface(GBIF)
used in multiple A6xx series chipets. Also makes changes to the
PDC/RSC sequencing specifically required for A618. This is needed
for proper interfacing with RPMH.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx.xml.h   | 52 +++-
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   | 24 +--
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 70 ++---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |  9 -
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 12 +-
 5 files changed, 154 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx.xml.h 
b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
index f44553e..ed78fee 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
@@ -16,11 +16,11 @@
 - /home/robclark/src/envytools/rnndb/adreno/a3xx.xml  (  83840 bytes, 
from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a4xx.xml  ( 112086 bytes, 
from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a5xx.xml  ( 147240 bytes, 
from 2018-12-02 17:29:54)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml  ( 140790 bytes, 
from 2018-12-02 17:29:54)
+- /home/smasetty/playarea/envytools/rnndb/adreno/a6xx.xml ( 161969 bytes, 
from 2019-11-29 07:18:16)
 - /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml  (  10431 bytes, 
from 2018-09-14 13:03:07)
 - /home/robclark/src/envytools/rnndb/adreno/ocmem.xml (   1773 bytes, 
from 2018-07-03 19:37:13)

-Copyright (C) 2013-2018 by the following authors:
+Copyright (C) 2013-2019 by the following authors:
 - Rob Clark  (robclark)
 - Ilia Mirkin  (imirkin)

@@ -2519,6 +2519,54 @@ static inline uint32_t 
A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val)

 #define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2   0x311a

+#define REG_A6XX_GBIF_SCACHE_CNTL1 0x3c02
+
+#define REG_A6XX_GBIF_QSB_SIDE0
0x3c03
+
+#define REG_A6XX_GBIF_QSB_SIDE1
0x3c04
+
+#define REG_A6XX_GBIF_QSB_SIDE2
0x3c05
+
+#define REG_A6XX_GBIF_QSB_SIDE3
0x3c06
+
+#define REG_A6XX_GBIF_HALT 0x3c45
+
+#define REG_A6XX_GBIF_HALT_ACK 0x3c46
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_EN  0x3cc0
+
+#define REG_A6XX_GBIF_PERF_CNT_SEL 0x3cc2
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_SEL 0x3cc3
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW00x3cc4
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW10x3cc5
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW20x3cc6
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW30x3cc7
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH0   0x3cc8
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH1   0x3cc9
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH2   0x3cca
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH3   0x3ccb
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW0 0x3ccc
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW1 0x3ccd
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW2 0x3cce
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH00x3ccf
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH10x3cd0
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH20x3cd1
+
 #define REG_A6XX_RB_WINDOW_OFFSET2 0x88d4
 #define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE   0x8000
 #define A6XX_RB_WINDOW_OFFSET2_X__MASK 0x7fff
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 85f14fe..158a74c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */

 #include 
 #include 
@@ -433,6 +433,8 @@ static void __iomem *a6xx_gmu_get_mmio(struct 
platform_device *pdev,

 static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
 {
+   struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+   struct adreno_gpu *adreno_gpu = _gpu->base;
struct platform_device *pdev = to_platform_device(gmu->dev);
void __iomem *pdcptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc");
void __iomem *seqptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc_seq");
@@ -480

[PATCH 3/5] drm: msm: a6xx: Dump GBIF registers, debugbus in gpu state

2019-12-03 Thread Sharat Masetty
Add the relevant GBIF registers and the debug bus to the a6xx gpu
state. This comes in pretty handy when debugging GPU bus related
issues.

Change-Id: I224fda727012a456ccd28ca14caf9fcce236e629
Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 52 +++--
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 16 +++--
 2 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index e686331..99b5a41 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
 
 #include 
 #include "msm_gem.h"
@@ -320,6 +320,7 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
 {
struct resource *res;
void __iomem *cxdbg = NULL;
+   int nr_debugbus_blocks;
 
/* Set up the GX debug bus */
 
@@ -374,9 +375,11 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
}
 
-   a6xx_state->debugbus = state_kcalloc(a6xx_state,
-   ARRAY_SIZE(a6xx_debugbus_blocks),
-   sizeof(*a6xx_state->debugbus));
+   nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
+   (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
+
+   a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
+   sizeof(*a6xx_state->debugbus));
 
if (a6xx_state->debugbus) {
int i;
@@ -388,15 +391,31 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
_state->debugbus[i]);
 
a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
+
+   /*
+* GBIF has same debugbus as of other GPU blocks, fall back to
+* default path if GPU uses GBIF, also GBIF uses exactly same
+* ID as of VBIF.
+*/
+   if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
+   a6xx_get_debugbus_block(gpu, a6xx_state,
+   _gbif_debugbus_block,
+   _state->debugbus[i]);
+
+   a6xx_state->nr_debugbus += 1;
+   }
}
 
-   a6xx_state->vbif_debugbus =
-   state_kcalloc(a6xx_state, 1,
-   sizeof(*a6xx_state->vbif_debugbus));
+   /*  Dump the VBIF debugbus on applicable targets */
+   if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
+   a6xx_state->vbif_debugbus =
+   state_kcalloc(a6xx_state, 1,
+   sizeof(*a6xx_state->vbif_debugbus));
 
-   if (a6xx_state->vbif_debugbus)
-   a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
-   a6xx_state->vbif_debugbus);
+   if (a6xx_state->vbif_debugbus)
+   a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
+   a6xx_state->vbif_debugbus);
+   }
 
if (cxdbg) {
a6xx_state->cx_debugbus =
@@ -770,14 +789,16 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
_state->gmu_registers[1]);
 }
 
+#define A6XX_GBIF_REGLIST_SIZE   1
 static void a6xx_get_registers(struct msm_gpu *gpu,
struct a6xx_gpu_state *a6xx_state,
struct a6xx_crashdumper *dumper)
 {
int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
ARRAY_SIZE(a6xx_reglist) +
-   ARRAY_SIZE(a6xx_hlsq_reglist);
+   ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
int index = 0;
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 
a6xx_state->registers = state_kcalloc(a6xx_state,
count, sizeof(*a6xx_state->registers));
@@ -792,6 +813,15 @@ static void a6xx_get_registers(struct msm_gpu *gpu,
a6xx_state, _ahb_reglist[i],
_state->registers[index++]);
 
+   if (a6xx_has_gbif(adreno_gpu))
+   a6xx_get_ahb_gpu_registers(gpu,
+   a6xx_state, _gbif_reglist,
+   _state->registers[index++]);
+   else
+   a6xx_get_ahb_gpu_registers(gpu,
+   a6xx_state, _vbif_reglist,
+   _state->registers[index++]);
+
for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
a6xx_get_crashdumper_registers(gpu,
a6xx_state, _reglist[i],
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
index 68cccfa..e67c2

[PATCH 0/5] Add support for A618 GPU

2019-12-03 Thread Sharat Masetty
This series and support to enable A618 GPU revision. Please review.

Sharat Masetty (5):
  drm: msm: Add 618 gpu to the adreno gpu list
  drm: msm: a6xx: Add support for A618
  drm: msm: a6xx: Dump GBIF registers, debugbus in gpu state
  drm: msm: a6xx: fix debug bus register configuration
  arm: dts: sc7180: Add A618 gpu dt blob

 arch/arm64/boot/dts/qcom/sc7180.dtsi| 116 
 drivers/gpu/drm/msm/adreno/a6xx.xml.h   |  52 -
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   |  24 +-
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   |  70 +++--
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |   9 ++-
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c |  76 --
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h |  16 +++-
 drivers/gpu/drm/msm/adreno/adreno_device.c  |  11 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.h |  12 ++-
 9 files changed, 348 insertions(+), 38 deletions(-)

--
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 2/5] drm: msm: a6xx: Add support for A618

2019-12-03 Thread Sharat Masetty
This patch adds support for enabling Graphics Bus Interface(GBIF)
used in multiple A6xx series chipets. Also makes changes to the
PDC/RSC sequencing specifically required for A618. This is needed
for proper interfacing with RPMH.

Change-Id: I0e7a314b0ae0d562f602512ad978f5d1ced5fb26
Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx.xml.h   | 52 +++-
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c   | 24 +--
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 70 ++---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h   |  9 -
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 12 +-
 5 files changed, 154 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx.xml.h 
b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
index f44553e..ed78fee 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx.xml.h
@@ -16,11 +16,11 @@
 - /home/robclark/src/envytools/rnndb/adreno/a3xx.xml  (  83840 bytes, 
from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a4xx.xml  ( 112086 bytes, 
from 2018-07-03 19:37:13)
 - /home/robclark/src/envytools/rnndb/adreno/a5xx.xml  ( 147240 bytes, 
from 2018-12-02 17:29:54)
-- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml  ( 140790 bytes, 
from 2018-12-02 17:29:54)
+- /home/smasetty/playarea/envytools/rnndb/adreno/a6xx.xml ( 161969 bytes, 
from 2019-11-29 07:18:16)
 - /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml  (  10431 bytes, 
from 2018-09-14 13:03:07)
 - /home/robclark/src/envytools/rnndb/adreno/ocmem.xml (   1773 bytes, 
from 2018-07-03 19:37:13)
 
-Copyright (C) 2013-2018 by the following authors:
+Copyright (C) 2013-2019 by the following authors:
 - Rob Clark  (robclark)
 - Ilia Mirkin  (imirkin)
 
@@ -2519,6 +2519,54 @@ static inline uint32_t 
A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val)
 
 #define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2   0x311a
 
+#define REG_A6XX_GBIF_SCACHE_CNTL1 0x3c02
+
+#define REG_A6XX_GBIF_QSB_SIDE0
0x3c03
+
+#define REG_A6XX_GBIF_QSB_SIDE1
0x3c04
+
+#define REG_A6XX_GBIF_QSB_SIDE2
0x3c05
+
+#define REG_A6XX_GBIF_QSB_SIDE3
0x3c06
+
+#define REG_A6XX_GBIF_HALT 0x3c45
+
+#define REG_A6XX_GBIF_HALT_ACK 0x3c46
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_EN  0x3cc0
+
+#define REG_A6XX_GBIF_PERF_CNT_SEL 0x3cc2
+
+#define REG_A6XX_GBIF_PERF_PWR_CNT_SEL 0x3cc3
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW00x3cc4
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW10x3cc5
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW20x3cc6
+
+#define REG_A6XX_GBIF_PERF_CNT_LOW30x3cc7
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH0   0x3cc8
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH1   0x3cc9
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH2   0x3cca
+
+#define REG_A6XX_GBIF_PERF_CNT_HIGH3   0x3ccb
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW0 0x3ccc
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW1 0x3ccd
+
+#define REG_A6XX_GBIF_PWR_CNT_LOW2 0x3cce
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH00x3ccf
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH10x3cd0
+
+#define REG_A6XX_GBIF_PWR_CNT_HIGH20x3cd1
+
 #define REG_A6XX_RB_WINDOW_OFFSET2 0x88d4
 #define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE   0x8000
 #define A6XX_RB_WINDOW_OFFSET2_X__MASK 0x7fff
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 85f14fe..158a74c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2017-2018 The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
 
 #include 
 #include 
@@ -433,6 +433,8 @@ static void __iomem *a6xx_gmu_get_mmio(struct 
platform_device *pdev,
 
 static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
 {
+   struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+   struct adreno_gpu *adreno_gpu = _gpu->base;
struct platform_device *pdev = to_platform_device(gmu->dev);
void __iomem *pdcptr = a6xx_gmu_get_mmio(pdev, "gmu_pdc");
void __iomem *seqptr = a

[PATCH 4/5] drm: msm: a6xx: fix debug bus register configuration

2019-12-03 Thread Sharat Masetty
Fix the cx debugbus related register configuration, to collect accurate
bus data during gpu snapshot. This helps with complete snapshot dump
and also complete proper GPU recovery.

Change-Id: I4f0ae3eb2dd5d24a88d805277fad212dda2d735e
Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 99b5a41..d6023ba 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -353,26 +353,26 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg = ioremap(res->start, resource_size(res));
 
if (cxdbg) {
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
0x76543210);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
0xFEDCBA98);
 
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
}
 
nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 1/5] drm: msm: Add 618 gpu to the adreno gpu list

2019-12-03 Thread Sharat Masetty
This patch adds Adreno 618 entry and its associated properties
to the gpulist entries.

Change-Id: Ie14ba09f32513ba6a6c882fda0d98ee1742b46d5
Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_device.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c 
b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 0888e0d..e728ea5 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -152,6 +152,17 @@
.init = a5xx_gpu_init,
.zapfw = "a540_zap.mdt",
}, {
+   .rev = ADRENO_REV(6, 1, 8, ANY_ID),
+   .revn = 618,
+   .name = "A618",
+   .fw = {
+   [ADRENO_FW_SQE] = "a630_sqe.fw",
+   [ADRENO_FW_GMU] = "a630_gmu.bin",
+   },
+   .gmem = SZ_512K,
+   .inactive_period = DRM_MSM_INACTIVE_PERIOD,
+   .init = a6xx_gpu_init,
+   }, {
.rev = ADRENO_REV(6, 3, 0, ANY_ID),
.revn = 630,
.name = "A630",
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH 5/5] arm: dts: sc7180: Add A618 gpu dt blob

2019-12-03 Thread Sharat Masetty
This patch adds the required dt nodes and properties
to enabled A618 GPU.

Change-Id: I7491c4de654c4b84d03dbcf703532448b27d4147
Signed-off-by: Sharat Masetty 
---
 arch/arm64/boot/dts/qcom/sc7180.dtsi | 116 +++
 1 file changed, 116 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi 
b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index c3db2e5..31223d0 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -18,6 +18,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 / {
interrupt-parent = <>;
@@ -733,6 +735,120 @@
#power-domain-cells = <1>;
};
 
+   gpu: gpu@500 {
+   compatible = "qcom,adreno-618.0", "qcom,adreno";
+   #stream-id-cells = <16>;
+   reg = <0 0x500 0 0x4>, <0 0x509e000 0 0x1000>,
+   <0 0x5061000 0 0x800>;
+   reg-names = "kgsl_3d0_reg_memory", "cx_mem", "cx_dbgc";
+
+   interrupts = ;
+
+   iommus = <_smmu 0>;
+
+   operating-points-v2 = <_opp_table>;
+
+   interconnects = <_noc 35 _virt 512>;
+
+   qcom,gmu = <>;
+
+   gpu_opp_table: opp-table {
+   compatible = "operating-points-v2";
+
+   opp-8 {
+   opp-hz = /bits/ 64 <8>;
+   opp-level = 
;
+   };
+
+   opp-65000 {
+   opp-hz = /bits/ 64 <65000>;
+   opp-level = 
;
+   };
+
+   opp-56500 {
+   opp-hz = /bits/ 64 <56500>;
+   opp-level = ;
+   };
+
+   opp-43000 {
+   opp-hz = /bits/ 64 <43000>;
+   opp-level = 
;
+   };
+
+opp-35500 {
+   opp-hz = /bits/ 64 <35500>;
+   opp-level = ;
+   };
+
+opp-26700 {
+   opp-hz = /bits/ 64 <26700>;
+   opp-level = 
;
+   };
+
+   opp-18000 {
+   opp-hz = /bits/ 64 <18000>;
+   opp-level = 
;
+   };
+   };
+   };
+
+   adreno_smmu: iommu@504 {
+   compatible = "qcom,sc7180-smmu-v2", "qcom,smmu-v2";
+   reg = <0 0x504 0 0x1>;
+   #iommu-cells = <1>;
+   #global-interrupts = <2>;
+   interrupts = ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ,
+   ;
+   clocks = < GCC_GPU_MEMNOC_GFX_CLK>,
+   < GCC_GPU_CFG_AHB_CLK>,
+   < GCC_DDRSS_GPU_AXI_CLK>;
+
+   clock-names = "bus", "iface", "mem_iface_clk";
+   power-domains = < CX_GDSC>;
+   };
+
+   gmu: gmu@506a000 {
+   compatible="qcom,adreno-gmu-618", "qcom,adreno-gmu";
+
+   reg =   <0 0x506a000 0 0x31000>,
+   <0 0xb29 0 0x1>,
+   <0 0xb49 0 0x1>;
+   reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+
+   interrupts = ,
+  ;
+   interrupt-names = "hfi", "gmu";
+
+   clocks = < GPU_CC_CX_GMU_CLK>,
+  < GPU_CC_CXO_CLK>,
+  < GCC_DD

[PATCH] rnndb: Add GBIF registers for a6xx GPU

2019-11-28 Thread Sharat Masetty
Add GBIF register definitions required to implement a618
GPU revision

Signed-off-by: Sharat Masetty 
---
 rnndb/adreno/a6xx.xml | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/rnndb/adreno/a6xx.xml b/rnndb/adreno/a6xx.xml
index 747f071..2d2063a 100644
--- a/rnndb/adreno/a6xx.xml
+++ b/rnndb/adreno/a6xx.xml
@@ -1748,6 +1748,32 @@ to upconvert to 32b float internally?


 
+   
+   
+   
+   
+   
+   
+   
+   
+
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   
+   

 

-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm: msm: a6xx: fix debug bus register configuration

2019-11-06 Thread Sharat Masetty
Fix the cx debugbus related register configuration, to collect accurate
bus data during gpu snapshot. This helps with complete snapshot dump
and also complete proper GPU recovery.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 483e100..c5764b4 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -353,26 +353,26 @@ static void a6xx_get_debugbus(struct msm_gpu *gpu,
cxdbg = ioremap(res->start, resource_size(res));

if (cxdbg) {
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
0x76543210);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
0xFEDCBA98);

-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
-   cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
+   cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
}

nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
--
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/msm/a5xx: Build a5xx_gpu_state_(get/put) under the right conditionals

2018-12-11 Thread Sharat Masetty
Build the GPU crashstate capture functions only if either of
CONFIG_DEBUG_FS, CONFIG_DEV_COREDUMP is defined.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index d5f5e56..81014b5 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1328,6 +1328,7 @@ static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu 
*gpu,
msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
 }
 
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
 {
struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
@@ -1373,8 +1374,6 @@ int a5xx_gpu_state_put(struct msm_gpu_state *state)
return kref_put(>ref, a5xx_gpu_state_destroy);
 }
 
-
-#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
struct drm_printer *p)
 {
@@ -1456,13 +1455,13 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
.destroy = a5xx_destroy,
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = a5xx_show,
+   .gpu_state_get = a5xx_gpu_state_get,
+   .gpu_state_put = a5xx_gpu_state_put,
 #endif
 #if defined(CONFIG_DEBUG_FS)
.debugfs_init = a5xx_debugfs_init,
 #endif
.gpu_busy = a5xx_gpu_busy,
-   .gpu_state_get = a5xx_gpu_state_get,
-   .gpu_state_put = a5xx_gpu_state_put,
},
.get_timestamp = a5xx_get_timestamp,
 };
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[v2 1/2] drm/msm/adreno: Make adreno_gpu_state_get() return void

2018-12-11 Thread Sharat Masetty
We are not really checking the state of the adreno_gpu_state_get()
function at the callers and in addition the state capture is mostly a
best effort service, so make the function return void.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 +---
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 1ca4bea..40bcf32 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -380,7 +380,7 @@ bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer 
*ring)
return false;
 }
 
-int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state)
+void adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int i, count = 0;
@@ -437,8 +437,6 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
 
state->nr_registers = count;
}
-
-   return 0;
 }
 
 void adreno_gpu_state_destroy(struct msm_gpu_state *state)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 4973c8c..d4834b3 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -235,7 +235,7 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,
 
 void adreno_gpu_state_destroy(struct msm_gpu_state *state);
 
-int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state);
+void adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state);
 int adreno_gpu_state_put(struct msm_gpu_state *state);
 
 /* ringbuffer helpers (the parts that are adreno specific) */
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[v2 2/2] drm/msm/a6xx: Fix NULL dereference during crashstate capture

2018-12-11 Thread Sharat Masetty
The gpu crashstate's base objects registers pointer can be NULL if the
target implementation decides to capture the register dump on its own.
This patch simply checks for NULL before dereferencing.

Signed-off-by: Sharat Masetty 
---
Changes from v1:
Addressed comments from Jordan Crouse

 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 40bcf32..56a63c4 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -415,6 +415,9 @@ void adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
}
}

+   if (!adreno_gpu->registers)
+   return;
+
/* Count the number of registers */
for (i = 0; adreno_gpu->registers[i] != ~0; i += 2)
count += adreno_gpu->registers[i + 1] -
@@ -550,9 +553,10 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state 
*state,
}
}

-   drm_puts(p, "registers:\n");
-
for (i = 0; i < state->nr_registers; i++) {
+   if (i == 0)
+   drm_puts(p, "registers:\n");
+
drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
state->registers[i * 2] << 2,
state->registers[(i * 2) + 1]);
--
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/2] drm/msm/a6xx: Fix NULL dereference during crashstate capture

2018-12-10 Thread Sharat Masetty
The gpu crashstate's base objects registers pointer can be NULL if the
target implementation decides to capture the register dump on its own.
This patch simply checks for NULL before dereferencing.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 40bcf32..a39cebc 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -415,6 +415,9 @@ void adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
}
}
 
+   if (!adreno_gpu->registers)
+   return;
+
/* Count the number of registers */
for (i = 0; adreno_gpu->registers[i] != ~0; i += 2)
count += adreno_gpu->registers[i + 1] -
@@ -550,12 +553,14 @@ void adreno_show(struct msm_gpu *gpu, struct 
msm_gpu_state *state,
}
}
 
-   drm_puts(p, "registers:\n");
+   if (state->nr_registers > 0) {
+   drm_puts(p, "registers:\n");
 
-   for (i = 0; i < state->nr_registers; i++) {
-   drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
-   state->registers[i * 2] << 2,
-   state->registers[(i * 2) + 1]);
+   for (i = 0; i < state->nr_registers; i++) {
+   drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
+   state->registers[i * 2] << 2,
+   state->registers[(i * 2) + 1]);
+   }
}
 }
 #endif
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/2] drm/msm/adreno: Make adreno_gpu_state_get() return void

2018-12-10 Thread Sharat Masetty
We are not really checking the state of the adreno_gpu_state_get()
function at the callers and in addition the state capture is mostly a
best effort service, so make the function return void.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 +---
 drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 1ca4bea..40bcf32 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -380,7 +380,7 @@ bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer 
*ring)
return false;
 }
 
-int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state)
+void adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state)
 {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int i, count = 0;
@@ -437,8 +437,6 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
 
state->nr_registers = count;
}
-
-   return 0;
 }
 
 void adreno_gpu_state_destroy(struct msm_gpu_state *state)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 4973c8c..d4834b3 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -235,7 +235,7 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,
 
 void adreno_gpu_state_destroy(struct msm_gpu_state *state);
 
-int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state);
+void adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state);
 int adreno_gpu_state_put(struct msm_gpu_state *state);
 
 /* ringbuffer helpers (the parts that are adreno specific) */
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/2] drm/scheduler: Add drm_sched_suspend/resume_timeout()

2018-11-29 Thread Sharat Masetty
This patch adds two new functions to help client drivers suspend and
resume the scheduler job timeout. This can be useful in cases where the
hardware has preemption support enabled. Using this, it is possible to have
the timeout active only for the ring which is active on the ringbuffer.
This patch also makes the job_list_lock IRQ safe.

Suggested-by: Christian Koenig 
Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/etnaviv/etnaviv_dump.c |  9 ++--
 drivers/gpu/drm/scheduler/sched_main.c | 91 --
 include/drm/gpu_scheduler.h|  4 ++
 3 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c 
b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 9146e30..fd6bad2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -118,6 +118,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
unsigned int n_obj, n_bomap_pages;
size_t file_size, mmu_size;
__le64 *bomap, *bomap_start;
+   unsigned long flags;
 
/* Only catch the first event, or when manually re-armed */
if (!etnaviv_dump_core)
@@ -134,13 +135,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
mmu_size + gpu->buffer.size;
 
/* Add in the active command buffers */
-   spin_lock(>sched.job_list_lock);
+   spin_lock_irqsave(>job_list_lock, flags);
list_for_each_entry(s_job, >sched.ring_mirror_list, node) {
submit = to_etnaviv_submit(s_job);
file_size += submit->cmdbuf.size;
n_obj++;
}
-   spin_unlock(>sched.job_list_lock);
+   spin_unlock_irqrestore(>job_list_lock, flags);
 
/* Add in the active buffer objects */
list_for_each_entry(vram, >mmu->mappings, mmu_node) {
@@ -182,14 +183,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
  gpu->buffer.size,
  etnaviv_cmdbuf_get_va(>buffer));
 
-   spin_lock(>sched.job_list_lock);
+   spin_lock_irqsave(>job_list_lock, flags);
list_for_each_entry(s_job, >sched.ring_mirror_list, node) {
submit = to_etnaviv_submit(s_job);
etnaviv_core_dump_mem(, ETDUMP_BUF_CMD,
  submit->cmdbuf.vaddr, submit->cmdbuf.size,
  etnaviv_cmdbuf_get_va(>cmdbuf));
}
-   spin_unlock(>sched.job_list_lock);
+   spin_unlock_irqrestore(>job_list_lock, flags);
 
/* Reserve space for the bomap */
if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index c993d10..ca09b4e 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -196,10 +196,68 @@ static void drm_sched_start_timeout(struct 
drm_gpu_scheduler *sched)
schedule_delayed_work(>work_tdr, sched->timeout);
 }
 
+/**
+ * drm_sched_suspend_timeout - Suspend scheduler job timeout
+ *
+ * @sched: scheduler instance for which to suspend the timeout
+ *
+ * Suspend the delayed work timeout for the scheduler. This is done by
+ * modifying the delayed work timeout to an arbitrary large value,
+ * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
+ * called from an IRQ context.
+ *
+ * Returns the timeout remaining
+ *
+ */
+unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
+{
+   unsigned long sched_timeout, now = jiffies;
+
+   sched_timeout = sched->work_tdr.timer.expires;
+
+   /*
+* Modify the timeout to an arbitrarily large value. This also prevents
+* the timeout to be restarted when new submissions arrive
+*/
+   if (mod_delayed_work(system_wq, >work_tdr, MAX_SCHEDULE_TIMEOUT)
+   && time_after(sched_timeout, now))
+   return sched_timeout - now;
+   else
+   return sched->timeout;
+}
+EXPORT_SYMBOL(drm_sched_suspend_timeout);
+
+/**
+ * drm_sched_resume_timeout - Resume scheduler job timeout
+ *
+ * @sched: scheduler instance for which to resume the timeout
+ * @remaining: remaining timeout
+ *
+ * Resume the delayed work timeout for the scheduler. Note that
+ * this function can be called from an IRQ context.
+ */
+void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
+   unsigned long remaining)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>job_list_lock, flags);
+
+   if (list_empty(>ring_mirror_list))
+   cancel_delayed_work(>work_tdr);
+   else
+   mod_delayed_work(system_wq, >work_tdr, remaining);
+
+   spin_unlock_irqrestore(>job_list_lock, flags);
+}
+EXPORT_SYMBOL(drm_sched_resume_timeout);
+
 /* job_finish is called after hw fence signaled
  */
 static void drm_sched_job_

[PATCH 1/2] drm/scheduler: Set sched->thread to NULL on failure

2018-11-29 Thread Sharat Masetty
In cases where the scheduler instance is used as a base object of another
driver object, it's not clear if the driver can call scheduler cleanup on the
fail path. So, Set the sched->thread to NULL, so that the driver can safely
call drm_sched_fini() during cleanup.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/scheduler/sched_main.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 44fe587..c993d10 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -594,7 +594,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
   long timeout,
   const char *name)
 {
-   int i;
+   int i, ret;
sched->ops = ops;
sched->hw_submission_limit = hw_submission;
sched->name = name;
@@ -615,8 +615,10 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
/* Each scheduler will run on a seperate kernel thread */
sched->thread = kthread_run(drm_sched_main, sched, sched->name);
if (IS_ERR(sched->thread)) {
+   ret = PTR_ERR(sched->thread);
+   sched->thread = NULL;
DRM_ERROR("Failed to create scheduler for %s.\n", name);
-   return PTR_ERR(sched->thread);
+   return ret;
}
 
return 0;
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 3/4] drm/msm: Use msm_gpu_state_bo for ringbuffer data

2018-11-20 Thread Sharat Masetty
The ring substructure in msm_gpu_state is an extension of
msm_gpu_state_bo, so this patch changes the ring structure
to reuse the msm_gpu_state_bo as a base class, instead of
redefining the required variables.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 20 +++-
 drivers/gpu/drm/msm/msm_gpu.h   |  4 +---
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 6ebe842..bbf8d3e 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -383,7 +383,7 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
int size = 0, j;
 
state->ring[i].fence = gpu->rb[i]->memptrs->fence;
-   state->ring[i].iova = gpu->rb[i]->iova;
+   state->ring[i].bo.iova = gpu->rb[i]->iova;
state->ring[i].seqno = gpu->rb[i]->seqno;
state->ring[i].rptr = get_rptr(adreno_gpu, gpu->rb[i]);
state->ring[i].wptr = get_wptr(gpu->rb[i]);
@@ -397,10 +397,12 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
size = j + 1;
 
if (size) {
-   state->ring[i].data = kvmalloc(size << 2, GFP_KERNEL);
-   if (state->ring[i].data) {
-   memcpy(state->ring[i].data, gpu->rb[i]->start, 
size << 2);
-   state->ring[i].data_size = size << 2;
+   state->ring[i].bo.data =
+   kvmalloc(size << 2, GFP_KERNEL);
+   if (state->ring[i].bo.data) {
+   memcpy(state->ring[i].bo.data,
+   gpu->rb[i]->start, size << 2);
+   state->ring[i].bo.size = size << 2;
}
}
}
@@ -440,7 +442,7 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state)
int i;
 
for (i = 0; i < ARRAY_SIZE(state->ring); i++)
-   kvfree(state->ring[i].data);
+   kvfree(state->ring[i].bo.data);
 
for (i = 0; state->bos && i < state->nr_bos; i++)
kvfree(state->bos[i].data);
@@ -522,15 +524,15 @@ void adreno_show(struct msm_gpu *gpu, struct 
msm_gpu_state *state,
 
for (i = 0; i < gpu->nr_rings; i++) {
drm_printf(p, "  - id: %d\n", i);
-   drm_printf(p, "iova: 0x%016llx\n", state->ring[i].iova);
+   drm_printf(p, "iova: 0x%016llx\n", state->ring[i].bo.iova);
drm_printf(p, "last-fence: %d\n", state->ring[i].seqno);
drm_printf(p, "retired-fence: %d\n", state->ring[i].fence);
drm_printf(p, "rptr: %d\n", state->ring[i].rptr);
drm_printf(p, "wptr: %d\n", state->ring[i].wptr);
drm_printf(p, "size: %d\n", MSM_GPU_RINGBUFFER_SZ);
 
-   adreno_show_object(p, state->ring[i].data,
-   state->ring[i].data_size);
+   adreno_show_object(p, state->ring[i].bo.data,
+   state->ring[i].bo.size);
}
 
if (state->bos) {
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 7dc775f..a3a6371 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -198,13 +198,11 @@ struct msm_gpu_state {
struct timeval time;
 
struct {
-   u64 iova;
u32 fence;
u32 seqno;
u32 rptr;
u32 wptr;
-   void *data;
-   int data_size;
+   struct msm_gpu_state_bo bo;
} ring[MSM_GPU_MAX_RINGS];
 
int nr_registers;
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 4/4] drm/msm: Optimize adreno_show_object()

2018-11-20 Thread Sharat Masetty
When the userspace tries to read the crashstate dump, the read side
implementation in the driver currently ascii85 encodes all the binary
buffers and it does this each time the read system call is called.
A userspace tool like cat typically does a page by page read and the
number of read calls depends on the size of the data captured by the
driver. This is certainly not desirable and does not scale well with
large captures.

This patch encodes the buffer only once in the read path. With this there
is an immediate >10X speed improvement in crashstate save time.

Signed-off-by: Sharat Masetty 
---
Changes from v1:
Addressed comments from Jordan Crouse

 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 80 -
 drivers/gpu/drm/msm/msm_gpu.h   |  1 +
 2 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index bbf8d3e..7749967 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -441,11 +441,15 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state)
 {
int i;

-   for (i = 0; i < ARRAY_SIZE(state->ring); i++)
+   for (i = 0; i < ARRAY_SIZE(state->ring); i++) {
kvfree(state->ring[i].bo.data);
+   kvfree(state->ring[i].bo.encoded);
+   }

-   for (i = 0; state->bos && i < state->nr_bos; i++)
+   for (i = 0; state->bos && i < state->nr_bos; i++) {
kvfree(state->bos[i].data);
+   kvfree(state->bos[i].encoded);
+   }

kfree(state->bos);
kfree(state->comm);
@@ -472,34 +476,70 @@ int adreno_gpu_state_put(struct msm_gpu_state *state)

 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)

-static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len)
+static char *adreno_gpu_ascii85_encode(u32 *src, size_t len)
 {
-   char out[ASCII85_BUFSZ];
-   long l, datalen, i;
+   void *buf;
+   size_t buf_itr = 0;
+   long i, l;

-   if (!ptr || !len)
-   return;
+   if (!len)
+   return NULL;
+
+   l = ascii85_encode_len(len);

/*
-* Only dump the non-zero part of the buffer - rarely will any data
-* completely fill the entire allocated size of the buffer
+* ascii85 outputs either a 5 byte string or a 1 byte string. So we
+* account for the worst case of 5 bytes per dword plus the 1 for '\0'
 */
-   for (datalen = 0, i = 0; i < len >> 2; i++) {
-   if (ptr[i])
-   datalen = (i << 2) + 1;
+   buf = kvmalloc((l * 5) + 1, GFP_KERNEL);
+   if (!buf)
+   return NULL;
+
+   for (i = 0; i < l; i++) {
+   ascii85_encode(src[i], buf + buf_itr);
+
+   if (src[i] == 0)
+   buf_itr += 1;
+   else
+   buf_itr += 5;
}

-   /* Skip printing the object if it is empty */
-   if (datalen == 0)
+   return buf;
+}
+
+static void adreno_show_object(struct drm_printer *p,
+   struct msm_gpu_state_bo *bo)
+{
+   if ((!bo->data && !bo->encoded) || !bo->size)
return;

-   l = ascii85_encode_len(datalen);
+   if (!bo->encoded) {
+   long datalen, i;
+   u32 *buf = bo->data;
+
+   /*
+* Only dump the non-zero part of the buffer - rarely will
+* any data completely fill the entire allocated size of
+* the buffer.
+*/
+   for (datalen = 0, i = 0; i < (bo->size) >> 2; i++) {
+   if (buf[i])
+   datalen = ((i + 1) << 2);
+   }
+
+   bo->encoded = adreno_gpu_ascii85_encode(buf, datalen);
+
+   kvfree(bo->data);
+   bo->data = NULL;
+
+   if (!bo->encoded)
+   return;
+   }

drm_puts(p, "data: !!ascii85 |\n");
drm_puts(p, " ");

-   for (i = 0; i < l; i++)
-   drm_puts(p, ascii85_encode(ptr[i], out));
+   drm_puts(p, bo->encoded);

drm_puts(p, "\n");
 }
@@ -531,8 +571,7 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state 
*state,
drm_printf(p, "wptr: %d\n", state->ring[i].wptr);
drm_printf(p, "size: %d\n", MSM_GPU_RINGBUFFER_SZ);

-   adreno_show_object(p, state->ring[i].bo.data,
-   state->ring[i].bo.size);
+   adreno_show_object(p, &(state->ring[i].bo));
}

if (state->bos) {
@@ -543,8 +582,7 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gp

[PATCH 1/4] drm/msm: use kvmalloc for ring data in gpu crashstate

2018-11-20 Thread Sharat Masetty
The ringbuffer data to capture at crashtime can end up being large
sometimes, and the size can vary from being less than a page to the
full size of 32KB. So use the kvmalloc variant that perfectly fits the bill.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index d88d00d..6ebe842 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -397,7 +397,7 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
size = j + 1;
 
if (size) {
-   state->ring[i].data = kmalloc(size << 2, GFP_KERNEL);
+   state->ring[i].data = kvmalloc(size << 2, GFP_KERNEL);
if (state->ring[i].data) {
memcpy(state->ring[i].data, gpu->rb[i]->start, 
size << 2);
state->ring[i].data_size = size << 2;
@@ -440,7 +440,7 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state)
int i;
 
for (i = 0; i < ARRAY_SIZE(state->ring); i++)
-   kfree(state->ring[i].data);
+   kvfree(state->ring[i].data);
 
for (i = 0; state->bos && i < state->nr_bos; i++)
kvfree(state->bos[i].data);
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/4] include/linux/ascii85: Update ascii85_encode()

2018-11-20 Thread Sharat Masetty
The current implementation of ascii85_encode() does not copy the encoded
buffer 'z' to the output buffer in case the input is zero. This patch
simply adds this missing piece. This makes it easier to use this
function to encode large buffers.

Signed-off-by: Sharat Masetty 
---
 include/linux/ascii85.h | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/linux/ascii85.h b/include/linux/ascii85.h
index 4cc4020..00646fc 100644
--- a/include/linux/ascii85.h
+++ b/include/linux/ascii85.h
@@ -23,8 +23,12 @@
 {
int i;
 
-   if (in == 0)
-   return "z";
+   if (in == 0) {
+   out[0] = 'z';
+   out[1] = '\0';
+
+   return out;
+   }
 
out[5] = '\0';
for (i = 5; i--; ) {
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/scheduler: Add drm_sched_suspend/resume timeout functions

2018-11-15 Thread Sharat Masetty



On 11/15/2018 12:33 AM, Koenig, Christian wrote:

Am 14.11.18 um 18:29 schrieb Sharat Masetty:



On 11/8/2018 8:11 PM, Koenig, Christian wrote:

Am 08.11.18 um 14:42 schrieb Sharat Masetty:

Hi Christian,

Can you please review this patch? It is a continuation of the
discussion at [1].
At first I was thinking of using a cancel for suspend instead of a
mod(to an
arbitrarily large value), but I couldn't get it to fit in as I have
an additional
constraint of being able to call these functions from an IRQ context.

These new functions race with other contexts, primarily finish_job(),
timedout_job() and recovery(), but I did go through the possible
races between
these(I think). Please let me know what you think of this? I have
not tested
this yet and if this is something in the right direction, I will put
this
through my testing drill and polish it.

IMO I think I prefer the callback approach as it appears to be
simple, less
error prone for both the scheduler and the drivers.


Well I agree that this is way to complicated and looks racy to me as
well. But this is because you moved away from my initial suggestion.

So here is once more how to do it without any additional locks or races:

/**
    * drm_sched_suspend_timeout - suspend timeout for reset worker
    *
    * @sched: scheduler instance for which to suspend the timeout
    *
    * Suspend the delayed work timeout for the scheduler. Note that
    * this function can be called from an IRQ context. It returns the
timeout remaining.
    */
unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
{

 unsigned long timeout, current = jiffies

 timeout = sched->work_tdr.timer.expires;

 /*
  * Set timeout to an arbitrarily large value, this also prevents
the timer to be
  * started when new submissions arrive.
  */
 if (mod_delayed_work(system_wq, >work_tdr, sched->timeout
* 10) &&
     time_after(timeout, current))
     return timeout - current;
 else
     return sched->timeout;
}

/**
    * drm_sched_resume_timeout - resume timeout for reset worker
    *
    * @sched: scheduler instance for which to resume the timeout
    * @remaining: remaining timeout
    *
    * Resume the delayed work timeout for the scheduler. Note that
    * this function can be called from an IRQ context.
    */
void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
unsigned long remaining)
{
 if (list_empty(>ring_mirror_list))
     cancel_delayed_work(>work_tdr);
 else
     mod_delayed_work(system_wq, >work_tdr, remaining);
}

Hi Christian,

Thank you for the suggestions - I was able to try this out this week.
It works for the most part, but there are a couple of races which need
further considerations.

1) The drm_sched_resume_timeout() can race with both the
drm_sched_job_finish() and also new job submissions. In the driver the
job submission which triggered the preemption can be complete as soon
as the switch happens and it is quite possible that I get the
preemption complete and the job done interrupt at the same time. So
this means that drm_sched_resume_timeout() in IRQ context can race
with drm_sched_job_finish() in worker thread context on another CPU.
Also in parallel new jobs can be submitted, which will also update the
ring mirror list . These races can be addressed however with locking
the job_list_lock inside the drm_sched_resume_timeout().


Yeah I know, but I considered this race harmless. Ok, thinking more
about that I realized that this probably means that we could timeout a
job way to early.

How about canceling the timer first and then using mod_delayed_work to
set it to the remaining jiffies if there is a job running?

Do you mean something like this?

void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
unsigned long remaining)
{
cancel_delayed_work(>work_tdr);

if (!list_empty(>ring_mirror_list))
mod_delayed_work(system_wq, >work_tdr, remaining);
}

I think that the above can still be racy and I'd prefer locking with the 
job_list_lock...


Otherwise as you noted as well the alternative is really to make the
job_list_lock irq safe.



2) This one is a little more tricky - In the driver I start off with
all the timeouts suspended(except the one for the default ring), then
on every preemption interrupt I suspend the outgoing ring and resume
the incoming ring. I can only resume if it was previously suspended.
This is how it is set up. The problem that becomes apparent with this
approach is that for the suspended rings this arbitrarily large
timeout can fire at some point(because of no work) and just before
drm_sched_timedout_job() runs a new job can be inserted into the
mirror list. So in this case we get an incorrect timeout.

What are your thoughts on using cancel_delayed_work() instead of mod
in suspend_timeout. Yes we will lose the benefits of mod, but we
should be able to

Re: [PATCH] drm/scheduler: Add drm_sched_suspend/resume timeout functions

2018-11-14 Thread Sharat Masetty



On 11/8/2018 8:11 PM, Koenig, Christian wrote:

Am 08.11.18 um 14:42 schrieb Sharat Masetty:

Hi Christian,

Can you please review this patch? It is a continuation of the discussion at [1].
At first I was thinking of using a cancel for suspend instead of a mod(to an
arbitrarily large value), but I couldn't get it to fit in as I have an 
additional
constraint of being able to call these functions from an IRQ context.

These new functions race with other contexts, primarily finish_job(),
timedout_job() and recovery(), but I did go through the possible races between
these(I think). Please let me know what you think of this? I have not tested
this yet and if this is something in the right direction, I will put this
through my testing drill and polish it.

IMO I think I prefer the callback approach as it appears to be simple, less
error prone for both the scheduler and the drivers.


Well I agree that this is way to complicated and looks racy to me as
well. But this is because you moved away from my initial suggestion.

So here is once more how to do it without any additional locks or races:

/**
   * drm_sched_suspend_timeout - suspend timeout for reset worker
   *
   * @sched: scheduler instance for which to suspend the timeout
   *
   * Suspend the delayed work timeout for the scheduler. Note that
   * this function can be called from an IRQ context. It returns the
timeout remaining.
   */
unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
{

unsigned long timeout, current = jiffies

timeout = sched->work_tdr.timer.expires;

/*
 * Set timeout to an arbitrarily large value, this also prevents the 
timer to be
 * started when new submissions arrive.
 */
if (mod_delayed_work(system_wq, >work_tdr, sched->timeout * 10) 
&&
time_after(timeout, current))
return timeout - current;
else
return sched->timeout;
}

/**
   * drm_sched_resume_timeout - resume timeout for reset worker
   *
   * @sched: scheduler instance for which to resume the timeout
   * @remaining: remaining timeout
   *
   * Resume the delayed work timeout for the scheduler. Note that
   * this function can be called from an IRQ context.
   */
void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, unsigned long 
remaining)
{
if (list_empty(>ring_mirror_list))
cancel_delayed_work(>work_tdr);
else
mod_delayed_work(system_wq, >work_tdr, remaining);
}

Hi Christian,

Thank you for the suggestions - I was able to try this out this week. It 
works for the most part, but there are a couple of races which need 
further considerations.


1) The drm_sched_resume_timeout() can race with both the 
drm_sched_job_finish() and also new job submissions. In the driver the 
job submission which triggered the preemption can be complete as soon as 
the switch happens and it is quite possible that I get the preemption 
complete and the job done interrupt at the same time. So this means that 
drm_sched_resume_timeout() in IRQ context can race with 
drm_sched_job_finish() in worker thread context on another CPU. Also in 
parallel new jobs can be submitted, which will also update the ring 
mirror list . These races can be addressed however with locking the 
job_list_lock inside the drm_sched_resume_timeout().


2) This one is a little more tricky - In the driver I start off with all 
the timeouts suspended(except the one for the default ring), then on 
every preemption interrupt I suspend the outgoing ring and resume the 
incoming ring. I can only resume if it was previously suspended. This is 
how it is set up. The problem that becomes apparent with this approach 
is that for the suspended rings this arbitrarily large timeout can fire 
at some point(because of no work) and just before 
drm_sched_timedout_job() runs a new job can be inserted into the mirror 
list. So in this case we get an incorrect timeout.


What are your thoughts on using cancel_delayed_work() instead of mod in 
suspend_timeout. Yes we will lose the benefits of mod, but we should be 
able to synchronize drm_sched_suspend_timeout() and 
drm_sched_start_timeout() with some basic state. I have not thought this 
completely through so I may be missing something here.


Please share your thoughts on this

Thank you

Sharat



Regards,
Christian.



[1]  https://patchwork.freedesktop.org/patch/259914/

Signed-off-by: Sharat Masetty 
---
   drivers/gpu/drm/scheduler/sched_main.c | 81 
+-
   include/drm/gpu_scheduler.h|  5 +++
   2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index c993d10..9645789 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -191,11 +191,84 @@ bool drm_sched_dependency_optim

[PATCH] drm/scheduler: Add drm_sched_suspend/resume timeout functions

2018-11-08 Thread Sharat Masetty
Hi Christian,

Can you please review this patch? It is a continuation of the discussion at [1].
At first I was thinking of using a cancel for suspend instead of a mod(to an
arbitrarily large value), but I couldn't get it to fit in as I have an 
additional
constraint of being able to call these functions from an IRQ context.

These new functions race with other contexts, primarily finish_job(),
timedout_job() and recovery(), but I did go through the possible races between
these(I think). Please let me know what you think of this? I have not tested
this yet and if this is something in the right direction, I will put this
through my testing drill and polish it.

IMO I think I prefer the callback approach as it appears to be simple, less
error prone for both the scheduler and the drivers.

[1]  https://patchwork.freedesktop.org/patch/259914/

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/scheduler/sched_main.c | 81 +-
 include/drm/gpu_scheduler.h|  5 +++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index c993d10..9645789 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -191,11 +191,84 @@ bool drm_sched_dependency_optimized(struct dma_fence* 
fence,
  */
 static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
 {
+   unsigned long flags;
+
+   spin_lock_irqsave(>tdr_suspend_lock, flags);
+
+   sched->timeout_remaining = sched->timeout;
+
if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-   !list_empty(>ring_mirror_list))
+   !list_empty(>ring_mirror_list) && !sched->work_tdr_suspended)
schedule_delayed_work(>work_tdr, sched->timeout);
+
+   spin_unlock_irqrestore(>tdr_suspend_lock, flags);
 }

+/**
+ * drm_sched_suspend_timeout - suspend timeout for reset worker
+ *
+ * @sched: scheduler instance for which to suspend the timeout
+ *
+ * Suspend the delayed work timeout for the scheduler. Note that
+ * this function can be called from an IRQ context.
+ */
+void drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
+{
+   unsigned long flags, timeout;
+
+   spin_lock_irqsave(>tdr_suspend_lock, flags);
+
+   if (sched->work_tdr_suspended ||
+   sched->timeout == MAX_SCHEDULE_TIMEOUT ||
+   list_empty(>ring_mirror_list))
+   goto done;
+
+   timeout = sched->work_tdr.timer.expires;
+
+   /*
+* Reset timeout to an arbitrarily large value
+*/
+   mod_delayed_work(system_wq, >work_tdr, sched->timeout * 10);
+
+   timeout -= jiffies;
+
+   /* FIXME: Can jiffies be after timeout? */
+   sched->timeout_remaining = time_after(jiffies, timeout)? 0: timeout;
+   sched->work_tdr_suspended = true;
+
+done:
+   spin_unlock_irqrestore(>tdr_suspend_lock, flags);
+}
+EXPORT_SYMBOL(drm_sched_suspend_timeout);
+
+/**
+ * drm_sched_resume_timeout - resume timeout for reset worker
+ *
+ * @sched: scheduler instance for which to resume the timeout
+ *
+ * Resume the delayed work timeout for the scheduler. Note that
+ * this function can be called from an IRQ context.
+ */
+void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched)
+{
+   unsigned long flags;
+
+   spin_lock_irqsave(>tdr_suspend_lock, flags);
+
+   if (!sched->work_tdr_suspended ||
+   sched->timeout == MAX_SCHEDULE_TIMEOUT) {
+   spin_unlock_irqrestore(>tdr_suspend_lock, flags);
+   return;
+   }
+
+   mod_delayed_work(system_wq, >work_tdr, sched->timeout_remaining);
+
+   sched->work_tdr_suspended = false;
+
+   spin_unlock_irqrestore(>tdr_suspend_lock, flags);
+}
+EXPORT_SYMBOL(drm_sched_resume_timeout);
+
 /* job_finish is called after hw fence signaled
  */
 static void drm_sched_job_finish(struct work_struct *work)
@@ -348,6 +421,11 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler 
*sched)
struct drm_sched_job *s_job, *tmp;
bool found_guilty = false;
int r;
+   unsigned long flags;
+
+   spin_lock_irqsave(>tdr_suspend_lock, flags);
+   sched->work_tdr_suspended = false;
+   spin_unlock_irqrestore(>tdr_suspend_lock, flags);

spin_lock(>job_list_lock);
list_for_each_entry_safe(s_job, tmp, >ring_mirror_list, node) {
@@ -607,6 +685,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
init_waitqueue_head(>job_scheduled);
INIT_LIST_HEAD(>ring_mirror_list);
spin_lock_init(>job_list_lock);
+   spin_lock_init(>tdr_suspend_lock);
atomic_set(>hw_rq_count, 0);
INIT_DELAYED_WORK(>work_tdr, drm_sched_job_timedout);
atomic_set(>num_jobs, 0);
diff --git a/include/drm/gpu_sch

[PATCH 1/2] drm/msm/a6xx: Build a6xx_gpu_state under the right conditionals

2018-11-05 Thread Sharat Masetty
Build a6xx_gpu_state.c only if either of CONFIG_DEBUG_FS, CONFIG_DEV_COREDUMP
is defined.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/Makefile  | 5 -
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index bfb0f08..404e3ae 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -14,7 +14,6 @@ msm-y := \
adreno/a6xx_gpu.o \
adreno/a6xx_gmu.o \
adreno/a6xx_hfi.o \
-   adreno/a6xx_gpu_state.o \
hdmi/hdmi.o \
hdmi/hdmi_audio.o \
hdmi/hdmi_bridge.o \
@@ -97,6 +96,10 @@ msm-y := \
 msm-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o \
  disp/dpu1/dpu_dbg.o
 
+ifneq (,$(filter y,$(CONFIG_DEBUG_FS) $(CONFIG_DEV_COREDUMP)))
+msm-y += adreno/a6xx_gpu_state.o
+endif
+
 msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o
 msm-$(CONFIG_COMMON_CLK) += disp/mdp4/mdp4_lvds_pll.o
 msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_pll_8960.o
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 2c52b7c..9d1b8c9 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -754,12 +754,12 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu, 
u64 *prev_cycles)
.destroy = a6xx_destroy,
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
.show = a6xx_show,
+   .gpu_state_get = a6xx_gpu_state_get,
+   .gpu_state_put = a6xx_gpu_state_put,
 #endif
.gpu_busy = a6xx_gpu_busy,
.gpu_get_freq = a6xx_gmu_get_freq,
.gpu_set_freq = a6xx_gmu_set_freq,
-   .gpu_state_get = a6xx_gpu_state_get,
-   .gpu_state_put = a6xx_gpu_state_put,
},
.get_timestamp = a6xx_get_timestamp,
 };
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/2] drm/msm/a6xx: Fix a typo in a6xx gpu crash state

2018-11-05 Thread Sharat Masetty
This patch simply fixes a typo for the name of an indexed register.
CP_MEMPOOOL -> CP_MEMPOOL.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h 
b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
index 68cccfa..bbbec8d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
@@ -370,7 +370,7 @@ struct a6xx_registers {
 };
 
 static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = {
-   "CP_MEMPOOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR,
+   "CP_MEMPOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR,
REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060,
 };
 
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/3] include/linux/ascii85: Add ascii85_encode_to_buf()

2018-11-05 Thread Sharat Masetty
Add a new function which, in addition to ascii85 encoding to buffer
also returns the length of the encoded string. The length return enables
iteration over the output buffer space. This helps with efficient encoding
of larger buffers, since we avoid an additional memcpy/scnprintf.

Signed-off-by: Sharat Masetty 
---
 include/linux/ascii85.h | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/linux/ascii85.h b/include/linux/ascii85.h
index 4cc4020..3665899 100644
--- a/include/linux/ascii85.h
+++ b/include/linux/ascii85.h
@@ -23,8 +23,12 @@
 {
int i;
 
-   if (in == 0)
-   return "z";
+   if (in == 0) {
+   out[0] = 'z';
+   out[1] = '\0';
+
+   return out;
+   }
 
out[5] = '\0';
for (i = 5; i--; ) {
@@ -35,4 +39,15 @@
return out;
 }
 
+static inline size_t
+ascii85_encode_to_buf(u32 in, char *out)
+{
+   ascii85_encode(in, out);
+
+   if (in == 0)
+   return 1;
+
+   return 5;
+}
+
 #endif
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/3] drm/msm: use kvmalloc for ring data in gpu crashstate

2018-11-05 Thread Sharat Masetty
The ringbuffer data to capture at crashtime can end up being large
sometimes, and the size can vary from being less than a page to the
full size of 32KB. So use the kvmalloc variant that perfectly fits the bill.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 141062f..c93702d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -406,7 +406,7 @@ int adreno_gpu_state_get(struct msm_gpu *gpu, struct 
msm_gpu_state *state)
size = j + 1;
 
if (size) {
-   state->ring[i].data = kmalloc(size << 2, GFP_KERNEL);
+   state->ring[i].data = kvmalloc(size << 2, GFP_KERNEL);
if (state->ring[i].data) {
memcpy(state->ring[i].data, gpu->rb[i]->start, 
size << 2);
state->ring[i].data_size = size << 2;
@@ -445,7 +445,7 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state)
int i;
 
for (i = 0; i < ARRAY_SIZE(state->ring); i++)
-   kfree(state->ring[i].data);
+   kvfree(state->ring[i].data);
 
for (i = 0; state->bos && i < state->nr_bos; i++)
kvfree(state->bos[i].data);
-- 
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 3/3] drm/msm: Optimize adreno_show_object()

2018-11-05 Thread Sharat Masetty
When the userspace tries to read the crashstate dump, the read side
implementation in the driver currently ascii85 encodes all the binary
buffers and it does this each time the read system call is called.
A userspace tool like cat typically does a page by page read and the
number of read calls depends on the size of the data captured by the
driver. This is certainly not desirable and does not scale well with
large captures.

This patch encodes the buffer only once in the read path. With this there
is an immediate >10X speed improvement in crashstate save time.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 76 -
 drivers/gpu/drm/msm/msm_gpu.h   |  2 +
 2 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index c93702d..e29093e 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -475,34 +475,70 @@ int adreno_gpu_state_put(struct msm_gpu_state *state)
 
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 
-static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len)
+static char *adreno_gpu_ascii85_encode(u32 *src, size_t len)
 {
-   char out[ASCII85_BUFSZ];
-   long l, datalen, i;
+   void *buf;
+   size_t buf_itr = 0;
+   long i, l;
 
-   if (!ptr || !len)
-   return;
+   if (!len)
+   return NULL;
+
+   l = ascii85_encode_len(len);
 
/*
-* Only dump the non-zero part of the buffer - rarely will any data
-* completely fill the entire allocated size of the buffer
+* ascii85 outputs either a 5 byte string or a 1 byte string. So we
+* account for the worst case of 5 bytes per dword plus the 1 for '\0'
 */
-   for (datalen = 0, i = 0; i < len >> 2; i++) {
-   if (ptr[i])
-   datalen = (i << 2) + 1;
-   }
+   buf = kvmalloc((l * 5) + 1, GFP_KERNEL);
+   if (!buf)
+   return NULL;
 
-   /* Skip printing the object if it is empty */
-   if (datalen == 0)
+   for (i = 0; i < l; i++)
+   buf_itr += ascii85_encode_to_buf(src[i], buf + buf_itr);
+
+   return buf;
+}
+
+/* len is expected to be in bytes */
+static void adreno_show_object(struct drm_printer *p, void **ptr, int len,
+   bool *encoded)
+{
+   if (!*ptr || !len)
return;
 
-   l = ascii85_encode_len(datalen);
+   if (!*encoded) {
+   long datalen, i;
+   u32 *buf = *ptr;
+
+   /*
+* Only dump the non-zero part of the buffer - rarely will
+* any data completely fill the entire allocated size of
+* the buffer.
+*/
+   for (datalen = 0, i = 0; i < len >> 2; i++) {
+   if (buf[i])
+   datalen = ((i + 1) << 2);
+   }
+
+   /*
+* If we reach here, then the originally captured binary buffer
+* will be replaced with the ascii85 encoded string
+*/
+   *ptr = adreno_gpu_ascii85_encode(buf, datalen);
+
+   kvfree(buf);
+
+   *encoded = true;
+   }
+
+   if (!*ptr)
+   return;
 
drm_puts(p, "data: !!ascii85 |\n");
drm_puts(p, " ");
 
-   for (i = 0; i < l; i++)
-   drm_puts(p, ascii85_encode(ptr[i], out));
+   drm_puts(p, *ptr);
 
drm_puts(p, "\n");
 }
@@ -534,8 +570,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state 
*state,
drm_printf(p, "wptr: %d\n", state->ring[i].wptr);
drm_printf(p, "size: %d\n", MSM_GPU_RINGBUFFER_SZ);
 
-   adreno_show_object(p, state->ring[i].data,
-   state->ring[i].data_size);
+   adreno_show_object(p, >ring[i].data,
+   state->ring[i].data_size, >ring[i].encoded);
}
 
if (state->bos) {
@@ -546,8 +582,8 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state 
*state,
state->bos[i].iova);
drm_printf(p, "size: %zd\n", state->bos[i].size);
 
-   adreno_show_object(p, state->bos[i].data,
-   state->bos[i].size);
+   adreno_show_object(p, >bos[i].data,
+   state->bos[i].size, >bos[i].encoded);
}
}
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index f82bac0..efb49bb 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -187,6 +187,7 @@ struc

Re: [Freedreno] [PATCH 2/2] drm/scheduler: Add a start_timeout_notify() backend function

2018-11-04 Thread Sharat Masetty



On 11/2/2018 7:07 PM, Koenig, Christian wrote:

Am 02.11.18 um 14:25 schrieb Sharat Masetty:



On 11/2/2018 4:09 PM, Koenig, Christian wrote:

Am 02.11.18 um 11:31 schrieb Sharat Masetty:

Add an optional backend function op which will let the scheduler
clients
know when the timeout got scheduled on the scheduler instance. This
will
help drivers with multiple schedulers(one per ring) measure time
spent on
the ring accurately, eventually helping with better timeout detection.

Signed-off-by: Sharat Masetty 


Well, NAK. drm_sched_start_timeout() is called whenever the timer needs
to run, but that doesn't mean that the timer is started (e.g. it can
already be running).

So the callback would be called multiple times and not reflect the
actual job run time.

Additional to that it can be racy, e.g. we can complete multiple jobs at
a time before the timer is started again.

If you want to accurately count how much time you spend on each job/ring
you need to do this by measuring the time inside your driver instead.

E.g. for amdgpu I would get the time first in amdgpu_job_run() and then
again in amdgpu_job_free_cb() and calculate the difference.

Hi Christian,

Thank you for the comments and apologies if this was confusing. All I
want to determine(more accurately) is that when the scheduler instance
timer of say 500 ms goes off, is if the ring(associated with the
scheduler instance) actually spent 500 ms on the hardware - and for
this I need to know in the driver space when the timer actually started.

In msm hardware we have ring preemption support enabled and the kernel
driver triggers a preemption switch to a higher priority ring if there
is work available on that ring for the GPU to work on. So in the
presence of preemption it is possible that a lower priority ring did
not actually get to spend the full 500 ms and this is what I am trying
to catch with this callback.

I am *not* trying to profile per job time consumption with this.


Well, NAK. drm_sched_start_timeout() is called whenever the timer needs
to run, but that doesn't mean that the timer is started (e.g. it can
already be running).


Regarding the case where the timer may already be running - good
point, but it should be easy to address the scenario. I will check the
output
of schedule_delayed_work() and only call the callback(new proposed) if
the timer was really scheduled.


Yeah, that should work.



In summary, when this timedout_job() callback is called, it is assumed
that the job actually did time out from the POV of the scheduler, but
this will not hold true with preemption switching and that is what I
am trying to better address with this patch.


Mhm, so what you actually need is to suspend the timeout when the lower
priority ring is preempted and resume it when it is started again? I
wonder if that wouldn't be simpler.

We have support for ring preemption as well, but not implemented yet. So
it would be nice to have something that works for everybody.

But on the other hand a callback to notify the driver that the timer
started isn't so bad either.

Hi Christian,

Yes something like a suspend timeout would be simpler for the drivers, 
but I could not find anything which does this for the delayed work or 
even for the general timers. All I could find was cancel/delete.


In lieu of this, I chose this approach. If you like it this way(proposed 
patch), then I will address the review comments and re-spin... please 
let me know.


Sharat


Regards,
Christian.



Sharat


Regards,
Christian.


---
Here is an example of how I plan to use this new function callback.

[1] https://patchwork.freedesktop.org/patch/254227/

    drivers/gpu/drm/scheduler/sched_main.c | 7 ++-
    include/drm/gpu_scheduler.h    | 6 ++
    2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c
b/drivers/gpu/drm/scheduler/sched_main.c
index c993d10..afd461e 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -192,8 +192,13 @@ bool drm_sched_dependency_optimized(struct
dma_fence* fence,
    static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
    {
    if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-    !list_empty(>ring_mirror_list))
+    !list_empty(>ring_mirror_list)) {
+
    schedule_delayed_work(>work_tdr, sched->timeout);
+
+    if (sched->ops->start_timeout_notify)
+    sched->ops->start_timeout_notify(sched);
+    }
    }

    /* job_finish is called after hw fence signaled
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d87b268..faf28b4 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -239,6 +239,12 @@ struct drm_sched_backend_ops {
     * and it's time to clean it up.
     */
    void (*free_job)(struct drm_sched_job *sched_job);
+
+    /*
+ * (Optional) Called to let the driver know that a 

Re: [Freedreno] [PATCH 2/2] drm/scheduler: Add a start_timeout_notify() backend function

2018-11-02 Thread Sharat Masetty



On 11/2/2018 4:09 PM, Koenig, Christian wrote:

Am 02.11.18 um 11:31 schrieb Sharat Masetty:

Add an optional backend function op which will let the scheduler clients
know when the timeout got scheduled on the scheduler instance. This will
help drivers with multiple schedulers(one per ring) measure time spent on
the ring accurately, eventually helping with better timeout detection.

Signed-off-by: Sharat Masetty 


Well, NAK. drm_sched_start_timeout() is called whenever the timer needs
to run, but that doesn't mean that the timer is started (e.g. it can
already be running).

So the callback would be called multiple times and not reflect the
actual job run time.

Additional to that it can be racy, e.g. we can complete multiple jobs at
a time before the timer is started again.

If you want to accurately count how much time you spend on each job/ring
you need to do this by measuring the time inside your driver instead.

E.g. for amdgpu I would get the time first in amdgpu_job_run() and then
again in amdgpu_job_free_cb() and calculate the difference.

Hi Christian,

Thank you for the comments and apologies if this was confusing. All I 
want to determine(more accurately) is that when the scheduler instance 
timer of say 500 ms goes off, is if the ring(associated with the 
scheduler instance) actually spent 500 ms on the hardware - and for this 
I need to know in the driver space when the timer actually started.


In msm hardware we have ring preemption support enabled and the kernel 
driver triggers a preemption switch to a higher priority ring if there 
is work available on that ring for the GPU to work on. So in the 
presence of preemption it is possible that a lower priority ring did not 
actually get to spend the full 500 ms and this is what I am trying to 
catch with this callback.


I am *not* trying to profile per job time consumption with this.

> Well, NAK. drm_sched_start_timeout() is called whenever the timer needs
> to run, but that doesn't mean that the timer is started (e.g. it can
> already be running).

Regarding the case where the timer may already be running - good point, 
but it should be easy to address the scenario. I will check the output
of schedule_delayed_work() and only call the callback(new proposed) if 
the timer was really scheduled.


In summary, when this timedout_job() callback is called, it is assumed 
that the job actually did time out from the POV of the scheduler, but 
this will not hold true with preemption switching and that is what I am 
trying to better address with this patch.


Sharat


Regards,
Christian.


---
Here is an example of how I plan to use this new function callback.

[1] https://patchwork.freedesktop.org/patch/254227/

   drivers/gpu/drm/scheduler/sched_main.c | 7 ++-
   include/drm/gpu_scheduler.h| 6 ++
   2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index c993d10..afd461e 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -192,8 +192,13 @@ bool drm_sched_dependency_optimized(struct dma_fence* 
fence,
   static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
   {
if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-   !list_empty(>ring_mirror_list))
+   !list_empty(>ring_mirror_list)) {
+
schedule_delayed_work(>work_tdr, sched->timeout);
+
+   if (sched->ops->start_timeout_notify)
+   sched->ops->start_timeout_notify(sched);
+   }
   }

   /* job_finish is called after hw fence signaled
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d87b268..faf28b4 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -239,6 +239,12 @@ struct drm_sched_backend_ops {
* and it's time to clean it up.
 */
void (*free_job)(struct drm_sched_job *sched_job);
+
+   /*
+* (Optional) Called to let the driver know that a timeout detection
+* timer has been started.
+*/
+   void (*start_timeout_notify)(struct drm_gpu_scheduler *sched);
   };

   /**
--
1.9.1



___
Freedreno mailing list
freedr...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno



--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
Linux Foundation Collaborative Project
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/2] drm/scheduler: Set sched->thread to NULL on failure

2018-11-02 Thread Sharat Masetty
In cases where the scheduler instance is used as a base object of another
driver object, it's not clear if the driver can call scheduler cleanup on the
fail path. So, Set the sched->thread to NULL, so that the driver can safely
call drm_sched_fini() during cleanup.

Signed-off-by: Sharat Masetty 
---
 drivers/gpu/drm/scheduler/sched_main.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 44fe587..c993d10 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -594,7 +594,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
   long timeout,
   const char *name)
 {
-   int i;
+   int i, ret;
sched->ops = ops;
sched->hw_submission_limit = hw_submission;
sched->name = name;
@@ -615,8 +615,10 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
/* Each scheduler will run on a seperate kernel thread */
sched->thread = kthread_run(drm_sched_main, sched, sched->name);
if (IS_ERR(sched->thread)) {
+   ret = PTR_ERR(sched->thread);
+   sched->thread = NULL;
DRM_ERROR("Failed to create scheduler for %s.\n", name);
-   return PTR_ERR(sched->thread);
+   return ret;
}

return 0;
--
1.9.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


  1   2   3   >