[PATCH 3/5] gpu: host1x: Complete stream ID entry tables

2024-04-24 Thread Mikko Perttunen
From: Mikko Perttunen 

These tables contain fixed values to program the host1x hardware
with, so fill in the missing entries.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 150 +++
 1 file changed, 72 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 3a0aaa68ac8d..f83aaa40b13e 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -142,18 +142,29 @@ static const struct host1x_info host1x05_info = {
 };
 
 static const struct host1x_sid_entry tegra186_sid_table[] = {
-   {
-   /* VIC */
-   .base = 0x1af0,
-   .offset = 0x30,
-   .limit = 0x34
-   },
-   {
-   /* NVDEC */
-   .base = 0x1b00,
-   .offset = 0x30,
-   .limit = 0x34
-   },
+   { /* SE1  */  .base = 0x1ac8, .offset = 0x90,.limit = 0x90},
+   { /* SE2  */  .base = 0x1ad0, .offset = 0x90,.limit = 0x90},
+   { /* SE3  */  .base = 0x1ad8, .offset = 0x90,.limit = 0x90},
+   { /* SE4  */  .base = 0x1ae0, .offset = 0x90,.limit = 0x90},
+   { /* ISP  */  .base = 0x1ae8, .offset = 0x50,.limit = 0x50},
+   { /* VIC  */  .base = 0x1af0, .offset = 0x30,.limit = 0x34},
+   { /* NVENC*/  .base = 0x1af8, .offset = 0x30,.limit = 0x34},
+   { /* NVDEC*/  .base = 0x1b00, .offset = 0x30,.limit = 0x34},
+   { /* NVJPG*/  .base = 0x1b08, .offset = 0x30,.limit = 0x34},
+   { /* TSEC */  .base = 0x1b10, .offset = 0x30,.limit = 0x34},
+   { /* TSECB*/  .base = 0x1b18, .offset = 0x30,.limit = 0x34},
+   { /* VI 0 */  .base = 0x1b80, .offset = 0x1, .limit = 0x1 },
+   { /* VI 1 */  .base = 0x1b88, .offset = 0x2, .limit = 0x2 },
+   { /* VI 2 */  .base = 0x1b90, .offset = 0x3, .limit = 0x3 },
+   { /* VI 3 */  .base = 0x1b98, .offset = 0x4, .limit = 0x4 },
+   { /* VI 4 */  .base = 0x1ba0, .offset = 0x5, .limit = 0x5 },
+   { /* VI 5 */  .base = 0x1ba8, .offset = 0x6, .limit = 0x6 },
+   { /* VI 6 */  .base = 0x1bb0, .offset = 0x7, .limit = 0x7 },
+   { /* VI 7 */  .base = 0x1bb8, .offset = 0x8, .limit = 0x8 },
+   { /* VI 8 */  .base = 0x1bc0, .offset = 0x9, .limit = 0x9 },
+   { /* VI 9 */  .base = 0x1bc8, .offset = 0xa, .limit = 0xa },
+   { /* VI 10*/  .base = 0x1bd0, .offset = 0xb, .limit = 0xb },
+   { /* VI 11*/  .base = 0x1bd8, .offset = 0xc, .limit = 0xc },
 };
 
 static const struct host1x_info host1x06_info = {
@@ -173,24 +184,26 @@ static const struct host1x_info host1x06_info = {
 };
 
 static const struct host1x_sid_entry tegra194_sid_table[] = {
-   {
-   /* VIC */
-   .base = 0x1af0,
-   .offset = 0x30,
-   .limit = 0x34
-   },
-   {
-   /* NVDEC */
-   .base = 0x1b00,
-   .offset = 0x30,
-   .limit = 0x34
-   },
-   {
-   /* NVDEC1 */
-   .base = 0x1bc0,
-   .offset = 0x30,
-   .limit = 0x34
-   },
+   { /* SE1  */  .base = 0x1ac8, .offset = 0x90,  .limit = 0x90  },
+   { /* SE2  */  .base = 0x1ad0, .offset = 0x90,  .limit = 0x90  },
+   { /* SE3  */  .base = 0x1ad8, .offset = 0x90,  .limit = 0x90  },
+   { /* SE4  */  .base = 0x1ae0, .offset = 0x90,  .limit = 0x90  },
+   { /* ISP  */  .base = 0x1ae8, .offset = 0x800, .limit = 0x800 },
+   { /* VIC  */  .base = 0x1af0, .offset = 0x30,  .limit = 0x34  },
+   { /* NVENC*/  .base = 0x1af8, .offset = 0x30,  .limit = 0x34  },
+   { /* NVDEC*/  .base = 0x1b00, .offset = 0x30,  .limit = 0x34  },
+   { /* NVJPG*/  .base = 0x1b08, .offset = 0x30,  .limit = 0x34  },
+   { /* TSEC */  .base = 0x1b10, .offset = 0x30,  .limit = 0x34  },
+   { /* TSECB*/  .base = 0x1b18, .offset = 0x30,  .limit = 0x34  },
+   { /* VI   */  .base = 0x1b80, .offset = 0x800, .limit = 0x800 },
+   { /* VI_THI   */  .base = 0x1b88, .offset = 0x30,  .limit = 0x34  },
+   { /* ISP_THI  */  .base = 0x1b90, .offset = 0x30,  .limit = 0x34  },
+   { /* PVA0_CLUSTER */  .base = 0x1b98, .offset = 0x0,   .limit = 0x0   },
+   { /* PVA0_CLUSTER */  .base = 0x1ba0, .offset = 0x0,   .limit = 0x0   },
+   { /* NVDLA0   */  .base = 0x1ba8, .offset = 0x30,  .limit = 0x34  },
+   { /* NVDLA1   */  .base = 0x1bb0, .offset = 0x30,  .limit = 0x34  },
+   { /* NVENC1   */  .base = 0x1bb8, .offset = 0x30,  .limit = 0x34  },
+   { /* NVDEC1   */  .base = 0x1bc0, .offset = 0x30,  .limit = 0x34

[PATCH 5/5] gpu: host1x: fence: Disable timeout on pre-silicon

2024-04-24 Thread Mikko Perttunen
From: Mikko Perttunen 

Timing can be wonky on pre-silicon platforms, so disable fence timeouts
on pre-silicon platforms.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/fence.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index 139ad1afd935..711eeb93689e 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -11,6 +11,9 @@
 #include 
 #include 
 
+#include 
+
+#include "dev.h"
 #include "fence.h"
 #include "intr.h"
 #include "syncpt.h"
@@ -127,6 +130,12 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt 
*sp, u32 threshold,
 {
struct host1x_syncpt_fence *fence;
 
+   if (!tegra_is_silicon()) {
+   dev_info_once(sp->host->dev,
+   "fence timeout disabled due to pre-silicon platform\n");
+   timeout = false;
+   }
+
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
if (!fence)
return ERR_PTR(-ENOMEM);
-- 
2.42.0



[PATCH 4/5] gpu: host1x: Add MLOCK recovery for rest of engines

2024-04-24 Thread Mikko Perttunen
From: Mikko Perttunen 

Add class IDs / MLOCKs for MLOCK recovery for rest of engines
present on Tegra234.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/cdma_hw.c | 12 
 include/linux/host1x.h  |  5 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 1b65a10b9dfc..3f3f0018eee0 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -254,12 +254,24 @@ static void timeout_release_mlock(struct host1x_cdma 
*cdma)
u32 offset;
 
switch (ch->client->class) {
+   case HOST1X_CLASS_NVJPG1:
+   offset = HOST1X_COMMON_NVJPG1_MLOCK;
+   break;
+   case HOST1X_CLASS_NVENC:
+   offset = HOST1X_COMMON_NVENC_MLOCK;
+   break;
case HOST1X_CLASS_VIC:
offset = HOST1X_COMMON_VIC_MLOCK;
break;
+   case HOST1X_CLASS_NVJPG:
+   offset = HOST1X_COMMON_NVJPG_MLOCK;
+   break;
case HOST1X_CLASS_NVDEC:
offset = HOST1X_COMMON_NVDEC_MLOCK;
break;
+   case HOST1X_CLASS_OFA:
+   offset = HOST1X_COMMON_OFA_MLOCK;
+   break;
default:
WARN(1, "%s was not updated for class %u", __func__, 
ch->client->class);
return;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 9c8119ed13a4..5a7a81e5f9bd 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -14,12 +14,17 @@
 
 enum host1x_class {
HOST1X_CLASS_HOST1X = 0x1,
+   HOST1X_CLASS_NVJPG1 = 0x7,
+   HOST1X_CLASS_NVENC = 0x21,
+   HOST1X_CLASS_NVENC1 = 0x22,
HOST1X_CLASS_GR2D = 0x51,
HOST1X_CLASS_GR2D_SB = 0x52,
HOST1X_CLASS_VIC = 0x5D,
HOST1X_CLASS_GR3D = 0x60,
+   HOST1X_CLASS_NVJPG = 0xC0,
HOST1X_CLASS_NVDEC = 0xF0,
HOST1X_CLASS_NVDEC1 = 0xF5,
+   HOST1X_CLASS_OFA = 0xF8,
 };
 
 struct host1x;
-- 
2.42.0



[PATCH 1/5] gpu: host1x: Fix _writel function declarations

2024-04-24 Thread Mikko Perttunen
From: Mikko Perttunen 

Some of the _writel functions in dev.h had the r and v parameters
swapped. Fix this to avoid confusion.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 925a118db23f..53af0334c6e1 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -173,11 +173,11 @@ struct host1x {
 };
 
 void host1x_common_writel(struct host1x *host1x, u32 v, u32 r);
-void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r);
 u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
-void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
+void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r);
 u32 host1x_sync_readl(struct host1x *host1x, u32 r);
-void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v);
+void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r);
 u32 host1x_ch_readl(struct host1x_channel *ch, u32 r);
 
 static inline void host1x_hw_syncpt_restore(struct host1x *host,
-- 
2.42.0



[PATCH 2/5] gpu: host1x: Handle CDMA wraparound when debug printing

2024-04-24 Thread Mikko Perttunen
From: Mikko Perttunen 

During channel debug information dump, when printing CDMA
opcodes, the circular nature of the CDMA pushbuffer wasn't being
taken into account, sometimes accessing past the end. Change
the printing to take this into account.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/debug_hw.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 54e31d81517b..4c32aa1b95e8 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -177,7 +177,16 @@ static void show_gather(struct output *o, dma_addr_t 
phys_addr,
 
for (i = 0; i < words; i++) {
dma_addr_t addr = phys_addr + i * 4;
-   u32 val = *(map_addr + offset / 4 + i);
+   u32 voffset = offset + i * 4;
+   u32 val;
+
+   /* If we reach the RESTART opcode, continue at the beginning of 
pushbuffer */
+   if (cdma && voffset >= cdma->push_buffer.size) {
+   addr -= cdma->push_buffer.size;
+   voffset -= cdma->push_buffer.size;
+   }
+
+   val = *(map_addr + voffset / 4);
 
if (!data_count) {
host1x_debug_output(o, "%pad: %08x: ", , val);
@@ -203,7 +212,7 @@ static void show_channel_gathers(struct output *o, struct 
host1x_cdma *cdma)
job->num_slots, job->num_unpins);
 
show_gather(o, pb->dma + job->first_get, job->num_slots * 2, 
cdma,
-   pb->dma + job->first_get, pb->mapped + 
job->first_get);
+   pb->dma, pb->mapped);
 
for (i = 0; i < job->num_cmds; i++) {
struct host1x_job_gather *g;
@@ -227,7 +236,7 @@ static void show_channel_gathers(struct output *o, struct 
host1x_cdma *cdma)
host1x_debug_output(o, "  GATHER at %pad+%#x, %d 
words\n",
>base, g->offset, g->words);
 
-   show_gather(o, g->base + g->offset, g->words, cdma,
+   show_gather(o, g->base + g->offset, g->words, NULL,
g->base, mapped);
 
if (!job->gather_copy_mapped)
-- 
2.42.0



[PATCH 2/2] drm/tegra: gem: Don't attach dma-bufs when not needed

2024-04-24 Thread Mikko Perttunen
From: Mikko Perttunen 

The dma-buf import code currently attaches and maps all imported
dma-bufs to the drm device to get their sgt for mapping to the
directly managed IOMMU domain.

In many cases, like for newer chips (Tegra186+), the directly
managed IOMMU domain is, however, not used. Mapping to the drm
device can also cause issues e.g. with swiotlb since it is not
a real device.

To improve the situation, only attach and map imported dma-bufs
when required.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/gem.c | 66 -
 drivers/gpu/drm/tegra/gem.h | 21 
 2 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 189861c6c630..d275404ad0e9 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -76,8 +76,8 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device 
*dev, struct host1x_
/*
 * Imported buffers need special treatment to satisfy the semantics of 
DMA-BUF.
 */
-   if (gem->import_attach) {
-   struct dma_buf *buf = gem->import_attach->dmabuf;
+   if (obj->dma_buf) {
+   struct dma_buf *buf = obj->dma_buf;
 
map->attach = dma_buf_attach(buf, dev);
if (IS_ERR(map->attach)) {
@@ -184,8 +184,8 @@ static void *tegra_bo_mmap(struct host1x_bo *bo)
if (obj->vaddr)
return obj->vaddr;
 
-   if (obj->gem.import_attach) {
-   ret = dma_buf_vmap_unlocked(obj->gem.import_attach->dmabuf, 
);
+   if (obj->dma_buf) {
+   ret = dma_buf_vmap_unlocked(obj->dma_buf, );
if (ret < 0)
return ERR_PTR(ret);
 
@@ -208,8 +208,8 @@ static void tegra_bo_munmap(struct host1x_bo *bo, void 
*addr)
if (obj->vaddr)
return;
 
-   if (obj->gem.import_attach)
-   return dma_buf_vunmap_unlocked(obj->gem.import_attach->dmabuf, 
);
+   if (obj->dma_buf)
+   return dma_buf_vunmap_unlocked(obj->dma_buf, );
 
vunmap(addr);
 }
@@ -465,27 +465,32 @@ static struct tegra_bo *tegra_bo_import(struct drm_device 
*drm,
if (IS_ERR(bo))
return bo;
 
-   attach = dma_buf_attach(buf, drm->dev);
-   if (IS_ERR(attach)) {
-   err = PTR_ERR(attach);
-   goto free;
-   }
-
-   get_dma_buf(buf);
+   /*
+* If we need to use IOMMU API to map the dma-buf into the internally 
managed
+* domain, map it first to the DRM device to get an sgt.
+*/
+   if (tegra->domain) {
+   attach = dma_buf_attach(buf, drm->dev);
+   if (IS_ERR(attach)) {
+   err = PTR_ERR(attach);
+   goto free;
+   }
 
-   bo->sgt = dma_buf_map_attachment_unlocked(attach, DMA_TO_DEVICE);
-   if (IS_ERR(bo->sgt)) {
-   err = PTR_ERR(bo->sgt);
-   goto detach;
-   }
+   bo->sgt = dma_buf_map_attachment_unlocked(attach, 
DMA_TO_DEVICE);
+   if (IS_ERR(bo->sgt)) {
+   err = PTR_ERR(bo->sgt);
+   goto detach;
+   }
 
-   if (tegra->domain) {
err = tegra_bo_iommu_map(tegra, bo);
if (err < 0)
goto detach;
+
+   bo->gem.import_attach = attach;
}
 
-   bo->gem.import_attach = attach;
+   get_dma_buf(buf);
+   bo->dma_buf = buf;
 
return bo;
 
@@ -516,20 +521,21 @@ void tegra_bo_free_object(struct drm_gem_object *gem)
dev_name(mapping->dev));
}
 
-   if (tegra->domain)
+   if (tegra->domain) {
tegra_bo_iommu_unmap(tegra, bo);
 
-   if (gem->import_attach) {
-   struct dma_buf *dmabuf = gem->import_attach->dmabuf;
-
-   dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt,
- DMA_TO_DEVICE);
-   dma_buf_detach(dmabuf, gem->import_attach);
-   dma_buf_put(dmabuf);
-   } else {
-   tegra_bo_free(gem->dev, bo);
+   if (gem->import_attach) {
+   dma_buf_unmap_attachment_unlocked(gem->import_attach, 
bo->sgt,
+ DMA_TO_DEVICE);
+   dma_buf_detach(gem->import_attach->dmabuf, 
gem->import_attach);
+   }
}
 
+   tegra_bo_free(gem->dev, bo);
+
+   if (bo->dma_buf)
+   dma_buf_put(bo->dma_buf);
+
drm_gem_object_release(gem);
kfree(bo);
 }
diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h
index cb5146a67668..bf2cb

[PATCH 1/2] drm/tegra: gem: Open code drm_prime_gem_destroy

2024-04-24 Thread Mikko Perttunen
From: Mikko Perttunen 

Since we aren't using drm_gem_prime_import(_dev), for clarity don't
use the corresponding destroy function either.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/gem.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index b4eb030ea961..189861c6c630 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -520,9 +520,12 @@ void tegra_bo_free_object(struct drm_gem_object *gem)
tegra_bo_iommu_unmap(tegra, bo);
 
if (gem->import_attach) {
+   struct dma_buf *dmabuf = gem->import_attach->dmabuf;
+
dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt,
  DMA_TO_DEVICE);
-   drm_prime_gem_destroy(gem, NULL);
+   dma_buf_detach(dmabuf, gem->import_attach);
+   dma_buf_put(dmabuf);
} else {
tegra_bo_free(gem->dev, bo);
}
-- 
2.42.0



Re: [PATCH] gpu: host1x: Skip reset assert on Tegra186

2024-02-23 Thread Mikko Perttunen

On 2/23/24 03:06, Thierry Reding wrote:

On Thu Feb 22, 2024 at 2:05 AM CET, Mikko Perttunen wrote:

From: Mikko Perttunen 

On Tegra186, secure world applications may need to access host1x
during suspend/resume, and rely on the kernel to keep Host1x out
of reset during the suspend cycle. As such, as a quirk,
skip asserting Host1x's reset on Tegra186.

We don't need to keep the clocks enabled, as BPMP ensures the clock
stays on while Host1x is being used. On newer SoC's, the reset line
is inaccessible, so there is no need for the quirk.

Signed-off-by: Mikko Perttunen 
---
  drivers/gpu/host1x/dev.c | 15 +--
  drivers/gpu/host1x/dev.h |  6 ++
  2 files changed, 15 insertions(+), 6 deletions(-)


Applied to drm-misc-fixes, though I added the Fixes: tag that Jon
mentioned in reply to v1 of this as well as his Reviewed-by and
Tested-by as well, since this is pretty much the same patch except
for the comments.


Thanks. Sorry for messing it up :p

Mikko



Thanks,
Thierry


[PATCH] gpu: host1x: Skip reset assert on Tegra186

2024-02-21 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra186, secure world applications may need to access host1x
during suspend/resume, and rely on the kernel to keep Host1x out
of reset during the suspend cycle. As such, as a quirk,
skip asserting Host1x's reset on Tegra186.

We don't need to keep the clocks enabled, as BPMP ensures the clock
stays on while Host1x is being used. On newer SoC's, the reset line
is inaccessible, so there is no need for the quirk.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 15 +--
 drivers/gpu/host1x/dev.h |  6 ++
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 42fd504abbcd..89983d7d73ca 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = {
.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
.sid_table = tegra186_sid_table,
.reserve_vblank_syncpts = false,
+   .skip_reset_assert = true,
 };
 
 static const struct host1x_sid_entry tegra194_sid_table[] = {
@@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct 
device *dev)
host1x_intr_stop(host);
host1x_syncpt_save(host);
 
-   err = reset_control_bulk_assert(host->nresets, host->resets);
-   if (err) {
-   dev_err(dev, "failed to assert reset: %d\n", err);
-   goto resume_host1x;
-   }
+   if (!host->info->skip_reset_assert) {
+   err = reset_control_bulk_assert(host->nresets, host->resets);
+   if (err) {
+   dev_err(dev, "failed to assert reset: %d\n", err);
+   goto resume_host1x;
+   }
 
-   usleep_range(1000, 2000);
+   usleep_range(1000, 2000);
+   }
 
clk_disable_unprepare(host->clk);
reset_control_bulk_release(host->nresets, host->resets);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index c8e302de7625..6143c2a61d70 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -116,6 +116,12 @@ struct host1x_info {
 * the display driver disables VBLANK increments.
 */
bool reserve_vblank_syncpts;
+   /*
+* On T186, secure world applications may require access to host1x
+* during suspend/resume. To allow this, we need to leave host1x
+* not in reset.
+*/
+   bool skip_reset_assert;
 };
 
 struct host1x {
-- 
2.42.0



Re: [PATCH] gpu: host1x: Skip reset assert on Tegra186

2024-02-18 Thread Mikko Perttunen

On 2/16/24 19:02, Thierry Reding wrote:

On Wed Feb 14, 2024 at 12:40 PM CET, Mikko Perttunen wrote:

From: Mikko Perttunen 

On Tegra186, other software components may rely on the kernel to
keep Host1x operational even during suspend. As such, as a quirk,
skip asserting Host1x's reset on Tegra186.


This all sounds a bit vague. What other software components rely on the
kernel to keep host1x operational during suspend? And why do they do so?
Why is this not a problem elsewhere?


My assumption is that it's due to a secure world application accessing 
NVDEC or display engines during suspend or resume. This happening 
without kernel knowledge is a bad thing, but it's hard to change at this 
point.


The reset line (CAR vs BPMP vs non-accessible reset line), and the 
secure application code programming this stuff is slightly different in 
every chip generation, which is where I think the differences happen.


Mikko



Thierry


[PATCH] gpu: host1x: Skip reset assert on Tegra186

2024-02-14 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra186, other software components may rely on the kernel to
keep Host1x operational even during suspend. As such, as a quirk,
skip asserting Host1x's reset on Tegra186.

We don't need to keep the clocks enabled, as BPMP ensures the clock
stays on while Host1x is being used. On newer SoC's, the reset line
is inaccessible, so there is no need for the quirk.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 15 +--
 drivers/gpu/host1x/dev.h |  1 +
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 42fd504abbcd..89983d7d73ca 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = {
.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
.sid_table = tegra186_sid_table,
.reserve_vblank_syncpts = false,
+   .skip_reset_assert = true,
 };
 
 static const struct host1x_sid_entry tegra194_sid_table[] = {
@@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct 
device *dev)
host1x_intr_stop(host);
host1x_syncpt_save(host);
 
-   err = reset_control_bulk_assert(host->nresets, host->resets);
-   if (err) {
-   dev_err(dev, "failed to assert reset: %d\n", err);
-   goto resume_host1x;
-   }
+   if (!host->info->skip_reset_assert) {
+   err = reset_control_bulk_assert(host->nresets, host->resets);
+   if (err) {
+   dev_err(dev, "failed to assert reset: %d\n", err);
+   goto resume_host1x;
+   }
 
-   usleep_range(1000, 2000);
+   usleep_range(1000, 2000);
+   }
 
clk_disable_unprepare(host->clk);
reset_control_bulk_release(host->nresets, host->resets);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index c8e302de7625..9c13e71a31ff 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -116,6 +116,7 @@ struct host1x_info {
 * the display driver disables VBLANK increments.
 */
bool reserve_vblank_syncpts;
+   bool skip_reset_assert;
 };
 
 struct host1x {
-- 
2.42.0



[PATCH 2/2] drm/tegra: Zero-initialize iosys_map

2023-09-01 Thread Mikko Perttunen
From: Mikko Perttunen 

UBSAN reports an invalid load for bool, as the iosys_map is read
later without being initialized. Zero-initialize it to avoid this.

Reported-by: Ashish Mhetre 
Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index a4023163493d..346f70edfb15 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -177,7 +177,7 @@ static void tegra_bo_unpin(struct host1x_bo_mapping *map)
 static void *tegra_bo_mmap(struct host1x_bo *bo)
 {
struct tegra_bo *obj = host1x_to_tegra_bo(bo);
-   struct iosys_map map;
+   struct iosys_map map = {0};
int ret;
 
if (obj->vaddr) {
-- 
2.41.0



[PATCH 1/2] gpu: host1x: Correct allocated size for contexts

2023-09-01 Thread Mikko Perttunen
From: Johnny Liu 

Original implementation over allocates the memory size for the
contexts list. The size of memory for the contexts list is based
on the number of iommu groups specified in the device tree.

Fixes: 8aa5bcb61612 ("gpu: host1x: Add context device management code")
Signed-off-by: Johnny Liu 
Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
index a3f336edd991..955c971c528d 100644
--- a/drivers/gpu/host1x/context.c
+++ b/drivers/gpu/host1x/context.c
@@ -34,10 +34,10 @@ int host1x_memory_context_list_init(struct host1x *host1x)
if (err < 0)
return 0;
 
-   cdl->devs = kcalloc(err, sizeof(*cdl->devs), GFP_KERNEL);
+   cdl->len = err / 4;
+   cdl->devs = kcalloc(cdl->len, sizeof(*cdl->devs), GFP_KERNEL);
if (!cdl->devs)
return -ENOMEM;
-   cdl->len = err / 4;
 
for (i = 0; i < cdl->len; i++) {
ctx = >devs[i];
-- 
2.41.0



[PATCH] gpu: host1x: Syncpoint interrupt sharding

2023-09-01 Thread Mikko Perttunen
From: Mikko Perttunen 

Support sharded syncpoint interrupts on Tegra234+. This feature
allows specifying one of eight interrupt lines for each syncpoint
to lower processing latency of syncpoint threshold
interrupts.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c| 28 +---
 drivers/gpu/host1x/dev.h|  3 ++-
 drivers/gpu/host1x/hw/intr_hw.c | 46 -
 3 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 7c6699aed7d2..b22821c81394 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -488,7 +488,7 @@ static int host1x_get_resets(struct host1x *host)
 static int host1x_probe(struct platform_device *pdev)
 {
struct host1x *host;
-   int err;
+   int err, i;
 
host = devm_kzalloc(>dev, sizeof(*host), GFP_KERNEL);
if (!host)
@@ -516,9 +516,29 @@ static int host1x_probe(struct platform_device *pdev)
return PTR_ERR(host->regs);
}
 
-   host->syncpt_irq = platform_get_irq(pdev, 0);
-   if (host->syncpt_irq < 0)
-   return host->syncpt_irq;
+   for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) {
+   char irq_name[] = "syncptX";
+   sprintf(irq_name, "syncpt%d", i);
+
+   err = platform_get_irq_byname_optional(pdev, irq_name);
+   if (err == -ENXIO)
+   break;
+   if (err < 0)
+   return err;
+
+   host->syncpt_irqs[i] = err;
+   }
+
+   host->num_syncpt_irqs = i;
+
+   /* Device tree without irq names */
+   if (i == 0) {
+   host->syncpt_irqs[0] = platform_get_irq(pdev, 0);
+   if (host->syncpt_irqs[0] < 0)
+   return host->syncpt_irqs[0];
+
+   host->num_syncpt_irqs = 1;
+   }
 
mutex_init(>devices_lock);
INIT_LIST_HEAD(>devices);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 75de50fe03d0..c8e302de7625 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -124,7 +124,8 @@ struct host1x {
void __iomem *regs;
void __iomem *hv_regs; /* hypervisor region */
void __iomem *common_regs;
-   int syncpt_irq;
+   int syncpt_irqs[8];
+   int num_syncpt_irqs;
struct host1x_syncpt *syncpt;
struct host1x_syncpt_base *bases;
struct device *dev;
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index b915ef7d0348..9880e0c47235 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -13,13 +13,20 @@
 #include "../intr.h"
 #include "../dev.h"
 
+struct host1x_intr_irq_data {
+   struct host1x *host;
+   u32 offset;
+};
+
 static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 {
-   struct host1x *host = dev_id;
+   struct host1x_intr_irq_data *irq_data = dev_id;
+   struct host1x *host = irq_data->host;
unsigned long reg;
unsigned int i, id;
 
-   for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
+   for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32);
+i += host->num_syncpt_irqs) {
reg = host1x_sync_readl(host,
HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
 
@@ -67,26 +74,41 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
 
/*
 * Program threshold interrupt destination among 8 lines per VM,
-* per syncpoint. For now, just direct all to the first interrupt
-* line.
+* per syncpoint. For each group of 32 syncpoints (corresponding to one
+* interrupt status register), direct to one interrupt line, going
+* around in a round robin fashion.
 */
-   for (id = 0; id < host->info->nb_pts; id++)
-   host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+   for (id = 0; id < host->info->nb_pts; id++) {
+   u32 reg_offset = id / 32;
+   u32 irq_index = reg_offset % host->num_syncpt_irqs;
+
+   host1x_sync_writel(host, irq_index, 
HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+   }
 #endif
 }
 
 static int
 host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-   int err;
+   int err, i;
+   struct host1x_intr_irq_data *irq_data;
+
+   irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, 
sizeof(irq_data[0]), GFP_KERNEL);
+   if (!irq_data)
+   return -ENOMEM;
 
host1x_hw_intr_disable_all_syncpt_intrs(host);
 
-   err = devm_request_irq(host->dev, host->syncpt_irq,
-  syncpt_thresh_isr, IRQF_SHARED,
-  "host1x

[PATCH 2/3] gpu: host1x: Stop CDMA before suspending

2023-09-01 Thread Mikko Perttunen
From: Mikko Perttunen 

Before going into suspend, wait all CDMA to go idle and stop it.
This will ensure no channel is still active while we enter
suspend, and ensures the driver doesn't think that CDMA is still
active when coming back from suspend (as HW state has been reset).

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/channel.c | 19 +++
 drivers/gpu/host1x/channel.h |  1 +
 drivers/gpu/host1x/dev.c |  1 +
 3 files changed, 21 insertions(+)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 79501c957532..08077afe4cde 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -81,6 +81,25 @@ void host1x_channel_stop(struct host1x_channel *channel)
 }
 EXPORT_SYMBOL(host1x_channel_stop);
 
+/**
+ * host1x_channel_stop_all() - disable CDMA on allocated channels
+ * @host: host1x instance
+ *
+ * Stop CDMA on allocated channels
+ */
+void host1x_channel_stop_all(struct host1x *host)
+{
+   struct host1x_channel_list *chlist = >channel_list;
+   int bit;
+
+   mutex_lock(>lock);
+
+   for_each_set_bit(bit, chlist->allocated_channels, 
host->info->nb_channels)
+   host1x_channel_stop(>channels[bit]);
+
+   mutex_unlock(>lock);
+}
+
 static void release_channel(struct kref *kref)
 {
struct host1x_channel *channel =
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index b23a8071fbd0..d7aede204d83 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -40,5 +40,6 @@ int host1x_channel_list_init(struct host1x_channel_list 
*chlist,
 void host1x_channel_list_free(struct host1x_channel_list *chlist);
 struct host1x_channel *host1x_channel_get_index(struct host1x *host,
unsigned int index);
+void host1x_channel_stop_all(struct host1x *host);
 
 #endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 7c6699aed7d2..6501bee9e8c1 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -655,6 +655,7 @@ static int __maybe_unused host1x_runtime_suspend(struct 
device *dev)
struct host1x *host = dev_get_drvdata(dev);
int err;
 
+   host1x_channel_stop_all(host);
host1x_intr_stop(host);
host1x_syncpt_save(host);
 
-- 
2.41.0



[PATCH 3/3] gpu: host1x: Enable system suspend callbacks

2023-09-01 Thread Mikko Perttunen
From: Mikko Perttunen 

With the previous CDMA stop fix, executing runtime PM ops around
system suspend now makes channel submissions work after system
suspend, so do that.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 6501bee9e8c1..b8ac44e7d11a 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -720,7 +720,7 @@ static int __maybe_unused host1x_runtime_resume(struct 
device *dev)
 static const struct dev_pm_ops host1x_pm_ops = {
SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume,
   NULL)
-   /* TODO: add system suspend-resume once driver will be ready for that */
+   SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, 
pm_runtime_force_resume)
 };
 
 static struct platform_driver tegra_host1x_driver = {
-- 
2.41.0



[PATCH 1/3] gpu: host1x: Add locking in channel allocation

2023-09-01 Thread Mikko Perttunen
From: Mikko Perttunen 

Add locking around channel allocation to avoid race conditions.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/channel.c | 7 +++
 drivers/gpu/host1x/channel.h | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2d0051d6314c..79501c957532 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -27,6 +27,8 @@ int host1x_channel_list_init(struct host1x_channel_list 
*chlist,
return -ENOMEM;
}
 
+   mutex_init(>lock);
+
return 0;
 }
 
@@ -104,8 +106,11 @@ static struct host1x_channel 
*acquire_unused_channel(struct host1x *host)
unsigned int max_channels = host->info->nb_channels;
unsigned int index;
 
+   mutex_lock(>lock);
+
index = find_first_zero_bit(chlist->allocated_channels, max_channels);
if (index >= max_channels) {
+   mutex_unlock(>lock);
dev_err(host->dev, "failed to find free channel\n");
return NULL;
}
@@ -114,6 +119,8 @@ static struct host1x_channel *acquire_unused_channel(struct 
host1x *host)
 
set_bit(index, chlist->allocated_channels);
 
+   mutex_unlock(>lock);
+
return >channels[index];
 }
 
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 39044ff6c3aa..b23a8071fbd0 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -10,6 +10,7 @@
 
 #include 
 #include 
+#include 
 
 #include "cdma.h"
 
@@ -18,6 +19,8 @@ struct host1x_channel;
 
 struct host1x_channel_list {
struct host1x_channel *channels;
+
+   struct mutex lock;
unsigned long *allocated_channels;
 };
 
-- 
2.41.0



Re: [PATCH] dma_buf/sync_file: Enable signaling for fences when querying status

2023-09-01 Thread Mikko Perttunen

On 7/25/23 10:46, Mikko Perttunen wrote:

From: Mikko Perttunen 

dma_fence_get_status is not guaranteed to return valid information
on if the fence has been signaled or not if SW signaling has not
been enabled for the fence. To ensure valid information is reported,
enable SW signaling for fences before getting their status.

Signed-off-by: Mikko Perttunen 
---
  drivers/dma-buf/sync_file.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..57f194b8477f 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -267,6 +267,7 @@ static int sync_fill_fence_info(struct dma_fence *fence,
strscpy(info->driver_name, fence->ops->get_driver_name(fence),
sizeof(info->driver_name));
  
+	dma_fence_enable_sw_signaling(fence);

info->status = dma_fence_get_status(fence);
while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags) &&
   !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, >flags))
@@ -307,6 +308,7 @@ static long sync_file_ioctl_fence_info(struct sync_file 
*sync_file,
 * info->num_fences.
 */
if (!info.num_fences) {
+   dma_fence_enable_sw_signaling(sync_file->fence);
info.status = dma_fence_get_status(sync_file->fence);
goto no_fences;
} else {


Any thoughts?

Mikko


Re: [PATCH] drm/tegra: Remove existing framebuffer only if we support display

2023-08-31 Thread Mikko Perttunen

On 8/30/23 13:19, Thomas Zimmermann wrote:

Hi

Am 25.08.23 um 15:22 schrieb Thierry Reding:

From: Thierry Reding 

Tegra DRM doesn't support display on Tegra234 and later, so make sure
not to remove any existing framebuffers in that case.

Signed-off-by: Thierry Reding 
---
  drivers/gpu/drm/tegra/drm.c | 8 +---
  1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index b1e1a78e30c6..7a38dadbc264 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1220,9 +1220,11 @@ static int host1x_drm_probe(struct 
host1x_device *dev)

  drm_mode_config_reset(drm);
-    err = drm_aperture_remove_framebuffers(_drm_driver);
-    if (err < 0)
-    goto hub;
+    if (drm->mode_config.num_crtc > 0) {


If you don't support the hardware, wouldn't it be better to return 
-ENODEV if !num_crtc?


While display is not supported through TegraDRM on Tegra234+, certain 
multimedia accelerators are supported, so we need to finish probe for those.


Cheers,
Mikko



Best regards
Thomas


+    err = drm_aperture_remove_framebuffers(_drm_driver);
+    if (err < 0)
+    goto hub;
+    }
  err = drm_dev_register(drm, 0);
  if (err < 0)






Re: [PATCH -next 2/2] drm/tegra: Use PTR_ERR_OR_ZERO() to simplify code

2023-08-22 Thread Mikko Perttunen

On 8/22/23 10:15, Jinjie Ruan wrote:

Return PTR_ERR_OR_ZERO() instead of return 0 or PTR_ERR() to
simplify code.

Signed-off-by: Jinjie Ruan 
---
  drivers/gpu/drm/tegra/drm.c | 5 +
  drivers/gpu/drm/tegra/gem.c | 5 +
  2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index ff36171c8fb7..4e29d76da1be 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -354,10 +354,7 @@ static int tegra_gem_create(struct drm_device *drm, void 
*data,
  
  	bo = tegra_bo_create_with_handle(file, drm, args->size, args->flags,

 >handle);
-   if (IS_ERR(bo))
-   return PTR_ERR(bo);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(bo);
  }
  
  static int tegra_gem_mmap(struct drm_device *drm, void *data,

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index a4023163493d..11ef0f8cb1e1 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -533,10 +533,7 @@ int tegra_bo_dumb_create(struct drm_file *file, struct 
drm_device *drm,
  
  	bo = tegra_bo_create_with_handle(file, drm, args->size, 0,

 >handle);
-   if (IS_ERR(bo))
-   return PTR_ERR(bo);
-
-   return 0;
+   return PTR_ERR_OR_ZERO(bo);
  }
  
  static vm_fault_t tegra_bo_fault(struct vm_fault *vmf)


NAK. See 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20230822=b784c77075023e1a71bc06e6b4f711acb99e9c73


Mikko


[PATCH] dma_buf/sync_file: Enable signaling for fences when querying status

2023-07-25 Thread Mikko Perttunen
From: Mikko Perttunen 

dma_fence_get_status is not guaranteed to return valid information
on if the fence has been signaled or not if SW signaling has not
been enabled for the fence. To ensure valid information is reported,
enable SW signaling for fences before getting their status.

Signed-off-by: Mikko Perttunen 
---
 drivers/dma-buf/sync_file.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index af57799c86ce..57f194b8477f 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -267,6 +267,7 @@ static int sync_fill_fence_info(struct dma_fence *fence,
strscpy(info->driver_name, fence->ops->get_driver_name(fence),
sizeof(info->driver_name));
 
+   dma_fence_enable_sw_signaling(fence);
info->status = dma_fence_get_status(fence);
while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, >flags) &&
   !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, >flags))
@@ -307,6 +308,7 @@ static long sync_file_ioctl_fence_info(struct sync_file 
*sync_file,
 * info->num_fences.
 */
if (!info.num_fences) {
+   dma_fence_enable_sw_signaling(sync_file->fence);
info.status = dma_fence_get_status(sync_file->fence);
goto no_fences;
} else {
-- 
2.40.1



[PATCH] drm/tegra: Enable runtime PM during probe

2023-06-13 Thread Mikko Perttunen
From: Mikko Perttunen 

Currently, engine drivers only enable runtime PM during the host1x
init callback. This can happen slightly later than the probe, which
can cause the power domain to intermittently not be turned off after
probe.

My hypothesis is that there is a race condition between the post-probe
power domain poweroff that is done from a queued work, and the
pm_runtime_enable call happening in the host1x init callback.
If the pm_runtime_enable call happens first, everything is OK and
the power off work can disable the power domain as PM runtime is
enabled and the device is runtime suspended. If power off work runs
first, PM runtime is still disabled for the device and the domain
must be kept powered.

Resolve the issue by moving the runtime PM enablement to the
probe function.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/gr2d.c  | 16 +++-
 drivers/gpu/drm/tegra/gr3d.c  | 16 +++-
 drivers/gpu/drm/tegra/nvdec.c | 17 +++--
 drivers/gpu/drm/tegra/vic.c   | 17 +++--
 4 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 50f77fddda54..56468268df21 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -70,22 +70,15 @@ static int gr2d_init(struct host1x_client *client)
goto free;
}
 
-   pm_runtime_enable(client->dev);
-   pm_runtime_use_autosuspend(client->dev);
-   pm_runtime_set_autosuspend_delay(client->dev, 200);
-
err = tegra_drm_register_client(dev->dev_private, drm);
if (err < 0) {
dev_err(client->dev, "failed to register client: %d\n", err);
-   goto disable_rpm;
+   goto detach_iommu;
}
 
return 0;
 
-disable_rpm:
-   pm_runtime_dont_use_autosuspend(client->dev);
-   pm_runtime_force_suspend(client->dev);
-
+detach_iommu:
host1x_client_iommu_detach(client);
 free:
host1x_syncpt_put(client->syncpts[0]);
@@ -299,6 +292,7 @@ static void gr2d_remove(struct platform_device *pdev)
 {
struct gr2d *gr2d = platform_get_drvdata(pdev);
 
+   pm_runtime_disable(>dev);
host1x_client_unregister(>client.base);
 }
 
@@ -372,6 +366,10 @@ static int __maybe_unused gr2d_runtime_resume(struct 
device *dev)
goto disable_clk;
}
 
+   pm_runtime_enable(dev);
+   pm_runtime_use_autosuspend(dev);
+   pm_runtime_set_autosuspend_delay(dev, 500);
+
return 0;
 
 disable_clk:
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index c026c2c916c1..46d213620b70 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -80,22 +80,15 @@ static int gr3d_init(struct host1x_client *client)
goto free;
}
 
-   pm_runtime_enable(client->dev);
-   pm_runtime_use_autosuspend(client->dev);
-   pm_runtime_set_autosuspend_delay(client->dev, 200);
-
err = tegra_drm_register_client(dev->dev_private, drm);
if (err < 0) {
dev_err(client->dev, "failed to register client: %d\n", err);
-   goto disable_rpm;
+   goto detach_iommu;
}
 
return 0;
 
-disable_rpm:
-   pm_runtime_dont_use_autosuspend(client->dev);
-   pm_runtime_force_suspend(client->dev);
-
+detach_iommu:
host1x_client_iommu_detach(client);
 free:
host1x_syncpt_put(client->syncpts[0]);
@@ -554,6 +547,7 @@ static void gr3d_remove(struct platform_device *pdev)
 {
struct gr3d *gr3d = platform_get_drvdata(pdev);
 
+   pm_runtime_disable(>dev);
host1x_client_unregister(>client.base);
 }
 
@@ -607,6 +601,10 @@ static int __maybe_unused gr3d_runtime_resume(struct 
device *dev)
goto disable_clk;
}
 
+   pm_runtime_enable(dev);
+   pm_runtime_use_autosuspend(dev);
+   pm_runtime_set_autosuspend_delay(dev, 500);
+
return 0;
 
 disable_clk:
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index ae78a81e5eef..2bc75f4d539f 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -177,13 +177,9 @@ static int nvdec_init(struct host1x_client *client)
goto free_channel;
}
 
-   pm_runtime_enable(client->dev);
-   pm_runtime_use_autosuspend(client->dev);
-   pm_runtime_set_autosuspend_delay(client->dev, 500);
-
err = tegra_drm_register_client(tegra, drm);
if (err < 0)
-   goto disable_rpm;
+   goto free_syncpt;
 
/*
 * Inherit the DMA parameters (such as maximum segment size) from the
@@ -193,10 +189,7 @@ static int nvdec_init(struct host1x_client *client)
 
return 0;
 
-disable_rpm:
-   pm_runtime_dont_use_autosuspend(client->dev);
-   pm_runtime_for

[PATCH 1/2] drm/tegra: Add error check for NVDEC firmware memory allocation

2023-04-13 Thread Mikko Perttunen
From: Mikko Perttunen 

The return value for tegra_drm_alloc was missing an error check.
Add one.

Reported-by: Dan Carpenter 
Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/nvdec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index ae78a81e5eef..15ce5e89fad4 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -276,6 +276,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
return err;
} else {
virt = tegra_drm_alloc(tegra, size, );
+   if (IS_ERR(virt))
+   return PTR_ERR(virt);
}
 
nvdec->falcon.firmware.virt = virt;
-- 
2.39.2



[PATCH 2/2] gpu: host1x: Return error when context device not attached to IOMMU

2023-04-13 Thread Mikko Perttunen
From: Mikko Perttunen 

If a context device was not attached to IOMMU, we kept the old
success err value causing context devices to be unregistered but
success to be returned. This would mean that things would go on
but with context isolation disabled.

To decide on an explicit behavior, let's return an error code
here instead. If someone wants to go without IOMMU on a platform
modern enough to support context isolation, they can remove the
context devices from device tree.

Reported-by: Dan Carpenter 
Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/context.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
index 9ad89d22c0ca..c45c4d4e6c1a 100644
--- a/drivers/gpu/host1x/context.c
+++ b/drivers/gpu/host1x/context.c
@@ -79,6 +79,14 @@ int host1x_memory_context_list_init(struct host1x *host1x)
!device_iommu_mapped(>dev)) {
dev_err(host1x->dev, "Context device %d has no 
IOMMU!\n", i);
device_unregister(>dev);
+
+   /*
+* This means that if IOMMU is disabled but context 
devices
+* are defined in the device tree, Host1x will fail to 
probe.
+* That's probably OK in this time and age.
+*/
+   err = -EINVAL;
+
goto unreg_devices;
}
}
-- 
2.39.2



Re: [GIT PULL] drm/tegra: Changes for v6.4-rc1

2023-04-06 Thread Mikko Perttunen

On 4/6/23 16:09, Daniel Vetter wrote:

On Thu, Apr 06, 2023 at 02:14:04PM +0200, Thierry Reding wrote:

Hi Dave, Daniel,

The following changes since commit e8d018dd0257f744ca50a729e3d042cf2ec9da65:

   Linux 6.3-rc3 (2023-03-19 13:27:55 -0700)

are available in the Git repository at:

   https://gitlab.freedesktop.org/drm/tegra.git tags/drm/tegra/for-6.4-rc1

for you to fetch changes up to 2429b3c529da29d4277d519bd66d034842dcd70c:

   drm/tegra: Avoid potential 32-bit integer overflow (2023-04-06 14:02:33 
+0200)

Thanks,
Thierry


drm/tegra: Changes for v6.4-rc1

The majority of this is minor cleanups and fixes. Other than those, this
contains Uwe's conversion to the new driver remove callback and Thomas'
fbdev DRM client conversion. The driver can now also be built on other
architectures to easy compile coverage.


Neat cleanup on top might be too look at the generic fbdev stuff, just as
an idea.


Finally, this adds Mikko as a second maintainer for the driver. As a
next step we also want Tegra DRM to move into drm-misc to streamline the
maintenance process.


Amusingly the one patch that dim flagged as lacking a 2nd set of eyes (no
a-b/rb or committer!=author) is the MAINTAINERS patch, would have been
good to record Mikko's ack for getting volunteered :-)


Haha, admittedly I was a bit surprised to see myself being added to 
MAINTAINERS so quickly after talking about it with Thierry; but yes, I 
submit myself to the duty :)


Cheers,
Mikko



Anyway all good, pulled, thanks!




Cai Huoqing (3):
   drm/tegra: sor: Make use of the helper function dev_err_probe()
   drm/tegra: dsi: Make use of the helper function dev_err_probe()
   drm/tegra: plane: Improve use of dev_err_probe()

Christian König (2):
   drm/tegra: Allow compile test on !ARM v2
   drm/tegra: Fix another missing include

Deepak R Varma (1):
   drm/tegra: sor: Remove redundant error logging

Diogo Ivo (1):
   drm/tegra: dsi: Clear enable register if powered by bootloader

Lee Jones (1):
   drm/tegra: dc: Remove set but unused variable 'state'

Mikko Perttunen (1):
   gpu: host1x: Don't rely on dma_fence_wait_timeout return value

Nur Hussein (1):
   drm/tegra: Avoid potential 32-bit integer overflow

Thierry Reding (1):
   MAINTAINERS: Add Mikko as backup maintainer for Tegra DRM

Thomas Zimmermann (7):
   drm/tegra: Include 
   drm/tegra: Include 
   drm/tegra: Removed fb from struct tegra_fbdev
   drm/tegra: Remove struct tegra_fbdev
   drm/tegra: Hide fbdev support behind config option
   drm/tegra: Initialize fbdev DRM client
   drm/tegra: Implement fbdev emulation as in-kernel client

Uwe Kleine-König (12):
   gpu: host1x: Make host1x_client_unregister() return void
   drm/tegra: rgb: Make tegra_dc_rgb_remove() return void
   drm/tegra: dc: Convert to platform remove callback returning void
   drm/tegra: dpaux: Convert to platform remove callback returning void
   drm/tegra: dsi: Convert to platform remove callback returning void
   drm/tegra: gr2d: Convert to platform remove callback returning void
   drm/tegra: gr3d: Convert to platform remove callback returning void
   drm/tegra: hdmi: Convert to platform remove callback returning void
   drm/tegra: hub: Convert to platform remove callback returning void
   drm/tegra: nvdec: Convert to platform remove callback returning void
   drm/tegra: sor: Convert to platform remove callback returning void
   drm/tegra: vic: Convert to platform remove callback returning void

Yang Yingliang (2):
   gpu: host1x: Fix potential double free if IOMMU is disabled
   gpu: host1x: Fix memory leak of device names

Ye Xingchen (1):
   gpu: host1x: mipi: Use devm_platform_get_and_ioremap_resource()

  MAINTAINERS |   1 +
  drivers/gpu/drm/tegra/Kconfig   |   2 +-
  drivers/gpu/drm/tegra/Makefile  |   2 +
  drivers/gpu/drm/tegra/dc.c  |  22 +--
  drivers/gpu/drm/tegra/dc.h  |   2 +-
  drivers/gpu/drm/tegra/dpaux.c   |   6 +-
  drivers/gpu/drm/tegra/drm.c |  23 +--
  drivers/gpu/drm/tegra/drm.h |  27 ++--
  drivers/gpu/drm/tegra/dsi.c |  51 ---
  drivers/gpu/drm/tegra/fb.c  | 242 +---
  drivers/gpu/drm/tegra/fbdev.c   | 241 +++
  drivers/gpu/drm/tegra/gem.c |   1 +
  drivers/gpu/drm/tegra/gr2d.c|  14 +-
  drivers/gpu/drm/tegra/gr3d.c|  14 +-
  drivers/gpu/drm/tegra/hdmi.c|  14 +-
  drivers/gpu/drm/tegra/hub.c |  13 +-
  drivers/gpu/drm/tegra/nvdec.c   |  14 +-
  drivers/gpu/drm/tegra/output.c  |   3 +
  drivers/gpu/drm/tegra/plane.c   |  16 +--
  drivers/gpu/drm/tegra/rgb.c

[PATCH] gpu: host1x: Don't rely on dma_fence_wait_timeout return value

2023-03-01 Thread Mikko Perttunen
From: Mikko Perttunen 

dma_fence_wait_timeout (along with a host of other jiffies-based
timeouting functions) returns zero both in case of timeout and when
the wait completes during the last jiffy before timeout. As such,
we can't rely on it to distinguish between success and timeout.

To prevent confusing callers by returning -EAGAIN before the timeout
period has elapsed, check if the fence got signaled again after
the wait.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/syncpt.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 2d2007760eac..f63d14a57a1d 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -248,7 +248,13 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 
thresh, long timeout,
if (value)
*value = host1x_syncpt_load(sp);
 
-   if (wait_err == 0)
+   /*
+* Don't rely on dma_fence_wait_timeout return value,
+* since it returns zero both on timeout and if the
+* wait completed with 0 jiffies left.
+*/
+   host1x_hw_syncpt_load(sp->host, sp);
+   if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh))
return -EAGAIN;
else if (wait_err < 0)
return wait_err;
-- 
2.39.2



Re: [PATCH] Don't test for NULL firmware before releasing

2023-02-17 Thread Mikko Perttunen

On 2/16/23 15:19, Stanislaw Gruszka wrote:

Hi

On Thu, Feb 16, 2023 at 02:37:15AM +0100, Jesper Juhl wrote:

 From 4fe34831e2e7677b1c9616356f0a2e0a36ec092f Mon Sep 17 00:00:00 2001
From: Jesper Juhl 
Date: Thu, 16 Feb 2023 02:33:05 +0100
Subject: [PATCH] Don't test for NULL firmware before releasing

release_firmware() tests for a NULL pointer itself, no need to do it up-front.

Signed-off-by: Jesper Juhl 

---
  drivers/gpu/drm/tegra/falcon.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index c0d85463eb1a..ae599441f031 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -153,8 +153,7 @@ int falcon_init(struct falcon *falcon)

  void falcon_exit(struct falcon *falcon)
  {
-   if (falcon->firmware.firmware)
-   release_firmware(falcon->firmware.firmware);
+   release_firmware(falcon->firmware.firmware);


Please check patches with checkpatch.pl before posting.

Regards
Stanislaw



Aside the formatting deficiencies, I'm also not in favor of relying on 
NULL checks inside callees since doing so removes contextual information 
from the programmer; just looking at the code, it is easy to assume the 
pointer cannot be NULL if there is no NULL check. Recently had a longer 
thread about this in the context of kfree in TegraDRM.


Thanks
Mikko


Re: [PATCH] gpu: host1x: fix uninitialized variable use

2023-01-28 Thread Mikko Perttunen

On 1/28/23 01:18, Nathan Chancellor wrote:

On Fri, Jan 27, 2023 at 11:14:00PM +0100, Arnd Bergmann wrote:

From: Arnd Bergmann 

The error handling for platform_get_irq() failing no longer
works after a recent change, clang now points this out with
a warning:

drivers/gpu/host1x/dev.c:520:6: error: variable 'syncpt_irq' is uninitialized 
when used here [-Werror,-Wuninitialized]
 if (syncpt_irq < 0)
 ^~

Fix this by removing the variable and checking the correct
error status.

Fixes: 625d4ffb438c ("gpu: host1x: Rewrite syncpoint interrupt handling")
Signed-off-by: Arnd Bergmann 


I had the same diff pending but civic duty called today :)

Reviewed-by: Nathan Chancellor 


---
  drivers/gpu/host1x/dev.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 4872d183d860..aae2efeef503 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -487,7 +487,6 @@ static int host1x_get_resets(struct host1x *host)
  static int host1x_probe(struct platform_device *pdev)
  {
struct host1x *host;
-   int syncpt_irq;
int err;
  
  	host = devm_kzalloc(>dev, sizeof(*host), GFP_KERNEL);

@@ -517,8 +516,8 @@ static int host1x_probe(struct platform_device *pdev)
}
  
  	host->syncpt_irq = platform_get_irq(pdev, 0);

-   if (syncpt_irq < 0)
-   return syncpt_irq;
+   if (host->syncpt_irq < 0)
+   return host->syncpt_irq;
  
  	mutex_init(>devices_lock);

INIT_LIST_HEAD(>devices);
--
2.39.0



Thanks both for the fix :)

FWIW,

Reviewed-by: Mikko Perttunen 


[PATCH 3/4] gpu: host1x: Don't skip assigning syncpoints to channels

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

The code to write the syncpoint channel assignment register
incorrectly skips the write if hypervisor registers are not available.

The register, however, is within the guest aperture so remove the
check and assign syncpoints properly even on virtualized systems.

Fixes: c3f52220f276 ("gpu: host1x: Enable Tegra186 syncpoint protection")
Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/syncpt_hw.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c 
b/drivers/gpu/host1x/hw/syncpt_hw.c
index dd39d67ccec3..8cf35b2eff3d 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,9 +106,6 @@ static void syncpt_assign_to_channel(struct host1x_syncpt 
*sp,
 #if HOST1X_HW >= 6
struct host1x *host = sp->host;
 
-   if (!host->hv_regs)
-   return;
-
host1x_sync_writel(host,
   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
-- 
2.39.0



[PATCH 4/4] drm/tegra: firewall: Check for is_addr_reg existence in IMM check

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

In the IMM opcode check, don't call is_addr_reg if it's not set.

Fixes: 8cc95f3fd35e ("drm/tegra: Add job firewall")
Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/firewall.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/tegra/firewall.c b/drivers/gpu/drm/tegra/firewall.c
index 1824d2db0e2c..d53f890fa689 100644
--- a/drivers/gpu/drm/tegra/firewall.c
+++ b/drivers/gpu/drm/tegra/firewall.c
@@ -97,6 +97,9 @@ static int fw_check_regs_imm(struct tegra_drm_firewall *fw, 
u32 offset)
 {
bool is_addr;
 
+   if (!fw->client->ops->is_addr_reg)
+   return 0;
+
is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class,
   offset);
if (is_addr)
-- 
2.39.0



[PATCH 1/4] arm64: tegra: Mark host1x as dma-coherent on Tegra194/234

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

Ensure appropriate configuration is done to make the host1x device
and context devices DMA coherent by adding the dma-coherent flag.

Fixes: b35f5b53a87b ("arm64: tegra: Add context isolation domains on Tegra234")
Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi | 1 +
 arch/arm64/boot/dts/nvidia/tegra234.dtsi | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index 4afcbd60e144..d8169920b33b 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1918,6 +1918,7 @@ host1x@13e0 {
interconnects = < TEGRA194_MEMORY_CLIENT_HOST1XDMAR 
>;
interconnect-names = "dma-mem";
iommus = < TEGRA194_SID_HOST1X>;
+   dma-coherent;
 
/* Context isolation domains */
iommu-map = <0  TEGRA194_SID_HOST1X_CTX0 1>,
diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index eaf05ee9acd1..77ceed615b7f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -571,6 +571,7 @@ host1x@13e0 {
interconnects = < TEGRA234_MEMORY_CLIENT_HOST1XDMAR 
>;
interconnect-names = "dma-mem";
iommus = <_niso1 TEGRA234_SID_HOST1X>;
+   dma-coherent;
 
/* Context isolation domains */
iommu-map = <0 _niso0 TEGRA234_SID_HOST1X_CTX0 1>,
-- 
2.39.0



[PATCH 2/4] gpu: host1x: Fix mask for syncpoint increment register

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra186+, the syncpoint ID has 10 bits of space. To allow
using more than 256 syncpoints, fix the mask.

Fixes: 9abdd497cd0a ("gpu: host1x: Tegra234 device data and headers")
Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/hw_host1x06_uclass.h | 2 +-
 drivers/gpu/host1x/hw/hw_host1x07_uclass.h | 2 +-
 drivers/gpu/host1x/hw/hw_host1x08_uclass.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h 
b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
index 5f831438d19b..50c32de452fb 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
host1x_uclass_incr_syncpt_cond_f(v)
 static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
 {
-   return (v & 0xff) << 0;
+   return (v & 0x3ff) << 0;
 }
 #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
host1x_uclass_incr_syncpt_indx_f(v)
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h 
b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
index 8cd2ef087d5d..887b878f92f7 100644
--- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
host1x_uclass_incr_syncpt_cond_f(v)
 static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
 {
-   return (v & 0xff) << 0;
+   return (v & 0x3ff) << 0;
 }
 #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
host1x_uclass_incr_syncpt_indx_f(v)
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h 
b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
index 724cccd71aa1..4fb1d090edae 100644
--- a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
host1x_uclass_incr_syncpt_cond_f(v)
 static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
 {
-   return (v & 0xff) << 0;
+   return (v & 0x3ff) << 0;
 }
 #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
host1x_uclass_incr_syncpt_indx_f(v)
-- 
2.39.0



[PATCH 2/4] gpu: host1x: Implement job tracking using DMA fences

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

In anticipation of removal of the intr API, implement job tracking
using DMA fences instead. The main two things about this are
making cdma_update schedule the work since fence completion can
now be called from interrupt context, and some complication in
ensuring the callback is not running when we free the fence.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/cdma.c  | 14 +++--
 drivers/gpu/host1x/cdma.h  |  2 ++
 drivers/gpu/host1x/hw/channel_hw.c | 48 +-
 drivers/gpu/host1x/job.c   | 12 ++--
 include/linux/host1x.h |  6 ++--
 5 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 103fda055394..bc821b0ed908 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -490,6 +490,15 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma 
*cdma,
host1x_hw_cdma_resume(host1x, cdma, restart_addr);
 }
 
+static void cdma_update_work(struct work_struct *work)
+{
+   struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, 
update_work);
+
+   mutex_lock(>lock);
+   update_cdma_locked(cdma);
+   mutex_unlock(>lock);
+}
+
 /*
  * Create a cdma
  */
@@ -499,6 +508,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
 
mutex_init(>lock);
init_completion(>complete);
+   INIT_WORK(>update_work, cdma_update_work);
 
INIT_LIST_HEAD(>sync_queue);
 
@@ -679,7 +689,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
  */
 void host1x_cdma_update(struct host1x_cdma *cdma)
 {
-   mutex_lock(>lock);
-   update_cdma_locked(cdma);
-   mutex_unlock(>lock);
+   schedule_work(>update_work);
 }
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 12c4327c4df0..7fd8168af4f9 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct host1x_syncpt;
 struct host1x_userctx_timeout;
@@ -69,6 +70,7 @@ struct host1x_cdma {
struct buffer_timeout timeout;  /* channel's timeout state/wq */
bool running;
bool torndown;
+   struct work_struct update_work;
 };
 
 #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 732abe0750ff..8a3119fc5a77 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -278,6 +278,14 @@ static void channel_program_cdma(struct host1x_job *job)
 #endif
 }
 
+static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb 
*cb)
+{
+   struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb);
+
+   /* Schedules CDMA update. */
+   host1x_cdma_update(>channel->cdma);
+}
+
 static int channel_submit(struct host1x_job *job)
 {
struct host1x_channel *ch = job->channel;
@@ -285,7 +293,6 @@ static int channel_submit(struct host1x_job *job)
u32 prev_max = 0;
u32 syncval;
int err;
-   struct host1x_waitlist *completed_waiter = NULL;
struct host1x *host = dev_get_drvdata(ch->dev->parent);
 
trace_host1x_channel_submit(dev_name(ch->dev),
@@ -298,14 +305,7 @@ static int channel_submit(struct host1x_job *job)
/* get submit lock */
err = mutex_lock_interruptible(>submitlock);
if (err)
-   goto error;
-
-   completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
-   if (!completed_waiter) {
-   mutex_unlock(>submitlock);
-   err = -ENOMEM;
-   goto error;
-   }
+   return err;
 
host1x_channel_set_streamid(ch);
host1x_enable_gather_filter(ch);
@@ -315,31 +315,37 @@ static int channel_submit(struct host1x_job *job)
err = host1x_cdma_begin(>cdma, job);
if (err) {
mutex_unlock(>submitlock);
-   goto error;
+   return err;
}
 
channel_program_cdma(job);
syncval = host1x_syncpt_read_max(sp);
 
+   /*
+* Create fence before submitting job to HW to avoid job completing
+* before the fence is set up.
+*/
+   job->fence = host1x_fence_create(sp, syncval);
+   if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) 
{
+   job->fence = NULL;
+   } else {
+   err = dma_fence_add_callback(job->fence, >fence_cb,
+job_complete_callback);
+   }
+
/* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(>cdma, job);
 
trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
 
-   /* sc

[PATCH 4/4] gpu: host1x: External timeout/cancellation for fences

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

Currently all fences have a 30 second timeout to ensure they are
cleaned up if the fence never completes otherwise. However, this
one size fits all solution doesn't actually fit in every case,
such as syncpoint waiting where we want to be able to have timeouts
longer than 30 seconds. As such, we want to be able to give control
over fence cancellation to the caller (and maybe eventually get rid
of the internal timeout altogether).

Here we add this cancellation mechanism by essentially adding a
function for entering the timeout path by function call, and changing
the syncpoint wait function to use it.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/submit.c |  2 +-
 drivers/gpu/host1x/fence.c | 40 --
 drivers/gpu/host1x/fence.h |  1 +
 drivers/gpu/host1x/hw/channel_hw.c |  2 +-
 drivers/gpu/host1x/syncpt.c|  4 ++-
 include/linux/host1x.h |  4 ++-
 6 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index 066f88564169..f4688fcafe93 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -654,7 +654,7 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, 
void *data,
args->syncpt.value = job->syncpt_end;
 
if (syncobj) {
-   struct dma_fence *fence = host1x_fence_create(job->syncpt, 
job->syncpt_end);
+   struct dma_fence *fence = host1x_fence_create(job->syncpt, 
job->syncpt_end, true);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
SUBMIT_ERR(context, "failed to create postfence: %d", 
err);
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index df5b56692d2c..139ad1afd935 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -37,8 +37,7 @@ static bool host1x_syncpt_fence_enable_signaling(struct 
dma_fence *f)
if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
return false;
 
-   /* One reference for interrupt path, one for timeout path. */
-   dma_fence_get(f);
+   /* Reference for interrupt path. */
dma_fence_get(f);
 
/*
@@ -46,11 +45,15 @@ static bool host1x_syncpt_fence_enable_signaling(struct 
dma_fence *f)
 * reference to any fences for which 'enable_signaling' has been
 * called (and that have not been signalled).
 *
-* We cannot (for now) normally guarantee that all fences get signalled.
-* As such, setup a timeout, so that long-lasting fences will get
-* reaped eventually.
+* We cannot currently always guarantee that all fences get signalled
+* or cancelled. As such, for such situations, set up a timeout, so
+* that long-lasting fences will get reaped eventually.
 */
-   schedule_delayed_work(>timeout_work, msecs_to_jiffies(3));
+   if (sf->timeout) {
+   /* Reference for timeout path. */
+   dma_fence_get(f);
+   schedule_delayed_work(>timeout_work, 
msecs_to_jiffies(3));
+   }
 
host1x_intr_add_fence_locked(sf->sp->host, sf);
 
@@ -80,7 +83,7 @@ void host1x_fence_signal(struct host1x_syncpt_fence *f)
return;
}
 
-   if (cancel_delayed_work(>timeout_work)) {
+   if (f->timeout && cancel_delayed_work(>timeout_work)) {
/*
 * We know that the timeout path will not be entered.
 * Safe to drop the timeout path's reference now.
@@ -99,8 +102,9 @@ static void do_fence_timeout(struct work_struct *work)
container_of(dwork, struct host1x_syncpt_fence, timeout_work);
 
if (atomic_xchg(>signaling, 1)) {
-   /* Already on interrupt path, drop timeout path reference. */
-   dma_fence_put(>base);
+   /* Already on interrupt path, drop timeout path reference if 
any. */
+   if (f->timeout)
+   dma_fence_put(>base);
return;
}
 
@@ -114,12 +118,12 @@ static void do_fence_timeout(struct work_struct *work)
 
dma_fence_set_error(>base, -ETIMEDOUT);
dma_fence_signal(>base);
-
-   /* Drop timeout path reference. */
-   dma_fence_put(>base);
+   if (f->timeout)
+   dma_fence_put(>base);
 }
 
-struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold,
+ bool timeout)
 {
struct host1x_syncpt_fence *fence;
 
@@ -129,6 +133,7 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt 
*sp, u32 threshold)
 
fence->sp = sp;
fence->threshold = threshold

[PATCH 3/4] gpu: host1x: Rewrite syncpoint interrupt handling

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

Move from the old, complex intr handling code to a new implementation
based on dma_fences. While there is a fair bit of churn to get there,
the new implementation is much simpler and likely faster as well due
to allowing signaling directly from interrupt context.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/debug.c  |   7 +-
 drivers/gpu/host1x/dev.c|   4 +-
 drivers/gpu/host1x/dev.h|  10 +-
 drivers/gpu/host1x/fence.c  |  96 -
 drivers/gpu/host1x/fence.h  |  18 +-
 drivers/gpu/host1x/hw/intr_hw.c |  74 ++-
 drivers/gpu/host1x/intr.c   | 334 ++--
 drivers/gpu/host1x/intr.h   |  83 +---
 drivers/gpu/host1x/syncpt.h |   3 +-
 9 files changed, 149 insertions(+), 480 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 6649b04b7131..a18cc8d8caf5 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -77,6 +77,7 @@ static int show_channel(struct host1x_channel *ch, void 
*data, bool show_fifo)
 
 static void show_syncpts(struct host1x *m, struct output *o, bool show_all)
 {
+   unsigned long irqflags;
struct list_head *pos;
unsigned int i;
int err;
@@ -92,10 +93,10 @@ static void show_syncpts(struct host1x *m, struct output 
*o, bool show_all)
u32 min = host1x_syncpt_load(m->syncpt + i);
unsigned int waiters = 0;
 
-   spin_lock(>syncpt[i].intr.lock);
-   list_for_each(pos, >syncpt[i].intr.wait_head)
+   spin_lock_irqsave(>syncpt[i].fences.lock, irqflags);
+   list_for_each(pos, >syncpt[i].fences.list)
waiters++;
-   spin_unlock(>syncpt[i].intr.lock);
+   spin_unlock_irqrestore(>syncpt[i].fences.lock, irqflags);
 
if (!kref_read(>syncpt[i].ref))
continue;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index f31039aca03c..4872d183d860 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -516,7 +516,7 @@ static int host1x_probe(struct platform_device *pdev)
return PTR_ERR(host->regs);
}
 
-   syncpt_irq = platform_get_irq(pdev, 0);
+   host->syncpt_irq = platform_get_irq(pdev, 0);
if (syncpt_irq < 0)
return syncpt_irq;
 
@@ -578,7 +578,7 @@ static int host1x_probe(struct platform_device *pdev)
goto free_contexts;
}
 
-   err = host1x_intr_init(host, syncpt_irq);
+   err = host1x_intr_init(host);
if (err) {
dev_err(>dev, "failed to initialize interrupts\n");
goto deinit_syncpt;
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 920e5548cfbc..75de50fe03d0 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -74,8 +74,7 @@ struct host1x_syncpt_ops {
 };
 
 struct host1x_intr_ops {
-   int (*init_host_sync)(struct host1x *host, u32 cpm,
-   void (*syncpt_thresh_work)(struct work_struct *work));
+   int (*init_host_sync)(struct host1x *host, u32 cpm);
void (*set_syncpt_threshold)(
struct host1x *host, unsigned int id, u32 thresh);
void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
@@ -125,6 +124,7 @@ struct host1x {
void __iomem *regs;
void __iomem *hv_regs; /* hypervisor region */
void __iomem *common_regs;
+   int syncpt_irq;
struct host1x_syncpt *syncpt;
struct host1x_syncpt_base *bases;
struct device *dev;
@@ -138,7 +138,6 @@ struct host1x {
dma_addr_t iova_end;
 
struct mutex intr_mutex;
-   int intr_syncpt_irq;
 
const struct host1x_syncpt_ops *syncpt_op;
const struct host1x_intr_ops *intr_op;
@@ -216,10 +215,9 @@ static inline void 
host1x_hw_syncpt_enable_protection(struct host1x *host)
return host->syncpt_op->enable_protection(host);
 }
 
-static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
-   void (*syncpt_thresh_work)(struct work_struct *))
+static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-   return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work);
+   return host->intr_op->init_host_sync(host, cpm);
 }
 
 static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index df428bcbae69..df5b56692d2c 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -15,22 +15,6 @@
 #include "intr.h"
 #include "syncpt.h"
 
-static DEFINE_SPINLOCK(lock);
-
-struct host1x_syncpt_fence {
-   struct dma_fence base;
-
-   atomic_t signaling;
-
-   struct host1x_syncpt *sp;
-   u32 threshold;

[PATCH 1/4] gpu: host1x: Implement syncpoint wait using DMA fences

2023-01-19 Thread Mikko Perttunen
From: Mikko Perttunen 

In anticipation of removal of the intr API, move host1x_syncpt_wait
to use DMA fences instead. As of this patch, this means that waits
have a 30 second maximum timeout because of the implicit timeout
we have with fences, but that will be lifted in a follow-up patch.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/syncpt.c | 96 -
 1 file changed, 20 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index f87a8705f518..75f58ec2ae23 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -7,6 +7,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -209,17 +210,6 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_incr);
 
-/*
- * Updated sync point form hardware, and returns true if syncpoint is expired,
- * false if we may need to wait
- */
-static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
-{
-   host1x_hw_syncpt_load(sp->host, sp);
-
-   return host1x_syncpt_is_expired(sp, thresh);
-}
-
 /**
  * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
  * @sp: host1x syncpoint
@@ -230,10 +220,10 @@ static bool syncpt_load_min_is_expired(struct 
host1x_syncpt *sp, u32 thresh)
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
   u32 *value)
 {
-   DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-   void *ref;
-   struct host1x_waitlist *waiter;
-   int err = 0, check_count = 0;
+   struct dma_fence *fence;
+   long wait_err;
+
+   host1x_hw_syncpt_load(sp->host, sp);
 
if (value)
*value = host1x_syncpt_load(sp);
@@ -241,73 +231,27 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 
thresh, long timeout,
if (host1x_syncpt_is_expired(sp, thresh))
return 0;
 
-   if (!timeout) {
-   err = -EAGAIN;
-   goto done;
-   }
-
-   /* allocate a waiter */
-   waiter = kzalloc(sizeof(*waiter), GFP_KERNEL);
-   if (!waiter) {
-   err = -ENOMEM;
-   goto done;
-   }
-
-   /* schedule a wakeup when the syncpoint value is reached */
-   err = host1x_intr_add_action(sp->host, sp, thresh,
-HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
-, waiter, );
-   if (err)
-   goto done;
-
-   err = -EAGAIN;
-   /* Caller-specified timeout may be impractically low */
if (timeout < 0)
timeout = LONG_MAX;
+   else if (timeout == 0)
+   return -EAGAIN;
 
-   /* wait for the syncpoint, or timeout, or signal */
-   while (timeout) {
-   long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
-   int remain;
-
-   remain = wait_event_interruptible_timeout(wq,
-   syncpt_load_min_is_expired(sp, thresh),
-   check);
-   if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
-   if (value)
-   *value = host1x_syncpt_load(sp);
+   fence = host1x_fence_create(sp, thresh);
+   if (IS_ERR(fence))
+   return PTR_ERR(fence);
 
-   err = 0;
+   wait_err = dma_fence_wait_timeout(fence, true, timeout);
+   dma_fence_put(fence);
 
-   break;
-   }
-
-   if (remain < 0) {
-   err = remain;
-   break;
-   }
-
-   timeout -= check;
-
-   if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
-   dev_warn(sp->host->dev,
-   "%s: syncpoint id %u (%s) stuck waiting %d, 
timeout=%ld\n",
-current->comm, sp->id, sp->name,
-thresh, timeout);
-
-   host1x_debug_dump_syncpts(sp->host);
-
-   if (check_count == MAX_STUCK_CHECK_COUNT)
-   host1x_debug_dump(sp->host);
-
-   check_count++;
-   }
-   }
-
-   host1x_intr_put_ref(sp->host, sp->id, ref, true);
+   if (value)
+   *value = host1x_syncpt_load(sp);
 
-done:
-   return err;
+   if (wait_err == 0)
+   return -EAGAIN;
+   else if (wait_err < 0)
+   return wait_err;
+   else
+   return 0;
 }
 EXPORT_SYMBOL(host1x_syncpt_wait);
 
-- 
2.39.0



Re: [PATCH] drm/tegra: submit: No need for Null pointer check before kfree

2022-12-30 Thread Mikko Perttunen

On 12/30/22 12:01, Mikko Perttunen wrote:

On 12/30/22 11:15, Stanislaw Gruszka wrote:

On Wed, Dec 28, 2022 at 03:17:59PM +0200, Mikko Perttunen wrote:

On 12/28/22 15:08, Deepak R Varma wrote:

On Wed, Dec 28, 2022 at 02:28:54PM +0200, Mikko Perttunen wrote:

On 12/27/22 19:14, Deepak R Varma wrote:

kfree() & vfree() internally perform NULL check on the pointer handed
to it and take no action if it indeed is NULL. Hence there is no need
for a pre-check of the memory pointer before handing it to
kfree()/vfree().

Issue reported by ifnullfree.cocci Coccinelle semantic patch script.

Signed-off-by: Deepak R Varma 
---
    drivers/gpu/drm/tegra/submit.c | 4 ++--
    1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/submit.c 
b/drivers/gpu/drm/tegra/submit.c

index 066f88564169..06f836db99d0 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -680,8 +680,8 @@ int tegra_drm_ioctl_channel_submit(struct 
drm_device *drm, void *data,

    kfree(job_data->used_mappings);
    }

-    if (job_data)
-    kfree(job_data);
+    kfree(job_data);
+
    put_bo:
    gather_bo_put(>base);
    unlock:
--
2.34.1





It continues to be the case that I think this transform is bad. 
Same applies

to the host1x patch.


Hello Mikko,
Thank you for responding to the patch proposal. Could you please 
explain why is

this bad?

Regards,
./drv



Mikko





Hi,

it gets rid of visual hints on code paths indicating the possible 
liveness

of pointer variables. I.e., after the change, whether the pointer can be
NULL or not is more difficult to reason about locally, instead requiring
more global reasoning which is mentally more taxing.

Since C's type system doesn't help with tracking these kinds of 
things, I
believe it is important to have these kinds of local contextual cues 
to help

the programmer.


I agree with your point of view. But regarding this particular patch,
at least on code base I can see, after free_job_data label job_done
can not be NULL. So patch seems to be ok, but maybe changelog need to
be different

Regards
Stanislaw


It can be NULL; see:

     job->user_data = job_data;
     job->release = release_job;
     job->timeout = 1;

     /*
  * job_data is now part of job reference counting, so don't 
release

  * it from here.
  */
     job_data = NULL;

If we go into free_job_data after this code (which happens if there is 
no error, or if host1x_job_submit fails), job_data will be NULL.


The memory is instead released in the 'put_job' label; host1x_job_put 
ends up calling release_job, which does the kfree.


Well, the refcount is dropped -- it's not necessarily freed immediately, 
if the job is in execution.


Mikko



(Yes, it is rather complicated..)

Thanks,
Mikko




Re: [PATCH] drm/tegra: submit: No need for Null pointer check before kfree

2022-12-30 Thread Mikko Perttunen

On 12/30/22 11:15, Stanislaw Gruszka wrote:

On Wed, Dec 28, 2022 at 03:17:59PM +0200, Mikko Perttunen wrote:

On 12/28/22 15:08, Deepak R Varma wrote:

On Wed, Dec 28, 2022 at 02:28:54PM +0200, Mikko Perttunen wrote:

On 12/27/22 19:14, Deepak R Varma wrote:

kfree() & vfree() internally perform NULL check on the pointer handed
to it and take no action if it indeed is NULL. Hence there is no need
for a pre-check of the memory pointer before handing it to
kfree()/vfree().

Issue reported by ifnullfree.cocci Coccinelle semantic patch script.

Signed-off-by: Deepak R Varma 
---
drivers/gpu/drm/tegra/submit.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index 066f88564169..06f836db99d0 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -680,8 +680,8 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, 
void *data,
kfree(job_data->used_mappings);
}

-   if (job_data)
-   kfree(job_data);
+   kfree(job_data);
+
put_bo:
gather_bo_put(>base);
unlock:
--
2.34.1





It continues to be the case that I think this transform is bad. Same applies
to the host1x patch.


Hello Mikko,
Thank you for responding to the patch proposal. Could you please explain why is
this bad?

Regards,
./drv



Mikko





Hi,

it gets rid of visual hints on code paths indicating the possible liveness
of pointer variables. I.e., after the change, whether the pointer can be
NULL or not is more difficult to reason about locally, instead requiring
more global reasoning which is mentally more taxing.

Since C's type system doesn't help with tracking these kinds of things, I
believe it is important to have these kinds of local contextual cues to help
the programmer.


I agree with your point of view. But regarding this particular patch,
at least on code base I can see, after free_job_data label job_done
can not be NULL. So patch seems to be ok, but maybe changelog need to
be different

Regards
Stanislaw


It can be NULL; see:

job->user_data = job_data;
job->release = release_job;
job->timeout = 1;

/*
 * job_data is now part of job reference counting, so don't release
 * it from here.
 */
job_data = NULL;

If we go into free_job_data after this code (which happens if there is 
no error, or if host1x_job_submit fails), job_data will be NULL.


The memory is instead released in the 'put_job' label; host1x_job_put 
ends up calling release_job, which does the kfree.


(Yes, it is rather complicated..)

Thanks,
Mikko


Re: [PATCH] drm/tegra: submit: No need for Null pointer check before kfree

2022-12-28 Thread Mikko Perttunen

On 12/28/22 15:34, Deepak R Varma wrote:

On Wed, Dec 28, 2022 at 03:17:59PM +0200, Mikko Perttunen wrote:

On 12/28/22 15:08, Deepak R Varma wrote:

On Wed, Dec 28, 2022 at 02:28:54PM +0200, Mikko Perttunen wrote:

On 12/27/22 19:14, Deepak R Varma wrote:

kfree() & vfree() internally perform NULL check on the pointer handed
to it and take no action if it indeed is NULL. Hence there is no need
for a pre-check of the memory pointer before handing it to
kfree()/vfree().

Issue reported by ifnullfree.cocci Coccinelle semantic patch script.

Signed-off-by: Deepak R Varma 
---
drivers/gpu/drm/tegra/submit.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index 066f88564169..06f836db99d0 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -680,8 +680,8 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, 
void *data,
kfree(job_data->used_mappings);
}

-   if (job_data)
-   kfree(job_data);
+   kfree(job_data);
+
put_bo:
gather_bo_put(>base);
unlock:
--
2.34.1





It continues to be the case that I think this transform is bad. Same applies
to the host1x patch.


Hello Mikko,
Thank you for responding to the patch proposal. Could you please explain why is
this bad?

Regards,
./drv



Mikko





Hi,

it gets rid of visual hints on code paths indicating the possible liveness
of pointer variables. I.e., after the change, whether the pointer can be
NULL or not is more difficult to reason about locally, instead requiring
more global reasoning which is mentally more taxing.

Since C's type system doesn't help with tracking these kinds of things, I
believe it is important to have these kinds of local contextual cues to help
the programmer.


Hello Mikko,
That really helps. Thank you for the detailed explanation. I do have an extended
question though. In this context, when we are ready to release the memory, how
is it useful to know if it is NULL or not this late in the flow when the scope
is about to end?


In the current code it doesn't matter, but if someone went to change 
this code (for example to add another release step), and we just had 
'kfree(job_data)', they would have to remember that kfree works with 
NULL pointers, and would have to go looking elsewhere in the code to see 
if it is in fact possible to assume that job_data cannot be NULL here, 
or not. If they forget about kfree working with NULL pointers, which 
wouldn't be that surprising since it is almost always only called with 
non-NULL pointers, they might instead introduce a bug.


In this particular instance it's probably not that bad since immediately 
above we have another 'if' block that checks if job_data is NULL, which 
serves as a hint to the programmer; however, as a general principle it 
stands that having the NULL check here makes it obvious to any reading 
programmer that they any changes they make have to consider if the 
pointer is NULL or not.




Thanks again!
./drv



Thanks!
Mikko







Mikko







Re: [PATCH] drm/tegra: submit: No need for Null pointer check before kfree

2022-12-28 Thread Mikko Perttunen

On 12/28/22 15:08, Deepak R Varma wrote:

On Wed, Dec 28, 2022 at 02:28:54PM +0200, Mikko Perttunen wrote:

On 12/27/22 19:14, Deepak R Varma wrote:

kfree() & vfree() internally perform NULL check on the pointer handed
to it and take no action if it indeed is NULL. Hence there is no need
for a pre-check of the memory pointer before handing it to
kfree()/vfree().

Issue reported by ifnullfree.cocci Coccinelle semantic patch script.

Signed-off-by: Deepak R Varma 
---
   drivers/gpu/drm/tegra/submit.c | 4 ++--
   1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index 066f88564169..06f836db99d0 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -680,8 +680,8 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, 
void *data,
kfree(job_data->used_mappings);
}

-   if (job_data)
-   kfree(job_data);
+   kfree(job_data);
+
   put_bo:
gather_bo_put(>base);
   unlock:
--
2.34.1





It continues to be the case that I think this transform is bad. Same applies
to the host1x patch.


Hello Mikko,
Thank you for responding to the patch proposal. Could you please explain why is
this bad?

Regards,
./drv



Mikko





Hi,

it gets rid of visual hints on code paths indicating the possible 
liveness of pointer variables. I.e., after the change, whether the 
pointer can be NULL or not is more difficult to reason about locally, 
instead requiring more global reasoning which is mentally more taxing.


Since C's type system doesn't help with tracking these kinds of things, 
I believe it is important to have these kinds of local contextual cues 
to help the programmer.


Mikko


Re: [PATCH] drm/tegra: submit: No need for Null pointer check before kfree

2022-12-28 Thread Mikko Perttunen

On 12/27/22 19:14, Deepak R Varma wrote:

kfree() & vfree() internally perform NULL check on the pointer handed
to it and take no action if it indeed is NULL. Hence there is no need
for a pre-check of the memory pointer before handing it to
kfree()/vfree().

Issue reported by ifnullfree.cocci Coccinelle semantic patch script.

Signed-off-by: Deepak R Varma 
---
  drivers/gpu/drm/tegra/submit.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index 066f88564169..06f836db99d0 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -680,8 +680,8 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, 
void *data,
kfree(job_data->used_mappings);
}

-   if (job_data)
-   kfree(job_data);
+   kfree(job_data);
+
  put_bo:
gather_bo_put(>base);
  unlock:
--
2.34.1





It continues to be the case that I think this transform is bad. Same 
applies to the host1x patch.


Mikko


Re: [PATCH] gpu: host1x: Remove redundant null checks before kfree

2022-12-01 Thread Mikko Perttunen

On 12/1/22 03:55, zys.zlj...@gmail.com wrote:

From: Yushan Zhou 

Fix the following coccicheck warning:
./drivers/gpu/host1x/fence.c:97:2-7: WARNING:
NULL check before some freeing functions is not needed.

Signed-off-by: Yushan Zhou 
---
  drivers/gpu/host1x/fence.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index ecab72882192..05b36bfc8b74 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -93,8 +93,7 @@ static void host1x_syncpt_fence_release(struct dma_fence *f)
  {
struct host1x_syncpt_fence *sf = to_host1x_fence(f);
  
-	if (sf->waiter)

-   kfree(sf->waiter);
+   kfree(sf->waiter);
  
  	dma_fence_free(f);

  }


I disagree with this coccinelle rule; I think it obfuscates from the 
reader the fact that the pointer could be NULL.


Mikko


Re: [PATCH v3 1/8] memory: tegra: Add API for retrieving carveout bounds

2022-09-21 Thread Mikko Perttunen

On 9/21/22 21:37, Krzysztof Kozlowski wrote:

On 20/09/2022 10:11, Mikko Perttunen wrote:

From: Mikko Perttunen 

On Tegra234 NVDEC firmware is loaded from a secure carveout, where it
has been loaded by a bootloader. When booting NVDEC, we need to tell it
the address of this firmware, which we can determine by checking the
starting address of the carveout. As such, add an MC API to query the
bounds of carveouts, and add related information on Tegra234.

Signed-off-by: Mikko Perttunen 



Acked-by: Krzysztof Kozlowski 


Best regards,
Krzysztof



Thanks!

Mikko


Re: [PATCH v3 0/8] Support for NVDEC on Tegra234

2022-09-21 Thread Mikko Perttunen

On 9/21/22 10:26, Krzysztof Kozlowski wrote:

On 20/09/2022 10:11, Mikko Perttunen wrote:

From: Mikko Perttunen 

v3:
* Updated patch 3 based on comments

v2:
* Updated patches 1,3 based on comments
* Added Acked-by to patch 2

Original message:

Hi all,

this series adds support for the HW video decoder, NVDEC,
on Tegra234 (Orin). The main change is a switch from Falcon
to RISC-V for the internal microcontroller, which brings along
a change in how the engine is booted. Otherwise it is backwards
compatible with earlier versions.


I asked you to describe the dependencies and patch merging strategy.
It's still not here, so I assume there are no and I am taking patches
relevant to me.

Best regards,
Krzysztof


Sorry, I described it in the earlier email and forgot to add it to the 
cover letter..


Patch 8 does depend on patch 1 so it would be better to take the memory 
patch with it, or however works best from maintainership point of view 
(not my expertise).


thanks,
Mikko


[PATCH v3 6/8] drm/tegra: nvdec: Support multiple clocks

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

NVDEC on Tegra234 requires multiple clocks. Add support for that.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/nvdec.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 276fe0472730..05af4d107421 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2015-2021, NVIDIA Corporation.
+ * Copyright (c) 2015-2022, NVIDIA Corporation.
  */
 
 #include 
@@ -28,6 +28,7 @@ struct nvdec_config {
const char *firmware;
unsigned int version;
bool supports_sid;
+   bool has_extra_clocks;
 };
 
 struct nvdec {
@@ -37,7 +38,8 @@ struct nvdec {
struct tegra_drm_client client;
struct host1x_channel *channel;
struct device *dev;
-   struct clk *clk;
+   struct clk_bulk_data clks[3];
+   unsigned int num_clks;
 
/* Platform configuration */
const struct nvdec_config *config;
@@ -258,7 +260,7 @@ static __maybe_unused int nvdec_runtime_resume(struct 
device *dev)
struct nvdec *nvdec = dev_get_drvdata(dev);
int err;
 
-   err = clk_prepare_enable(nvdec->clk);
+   err = clk_bulk_prepare_enable(nvdec->num_clks, nvdec->clks);
if (err < 0)
return err;
 
@@ -275,7 +277,7 @@ static __maybe_unused int nvdec_runtime_resume(struct 
device *dev)
return 0;
 
 disable:
-   clk_disable_unprepare(nvdec->clk);
+   clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
return err;
 }
 
@@ -285,7 +287,7 @@ static __maybe_unused int nvdec_runtime_suspend(struct 
device *dev)
 
host1x_channel_stop(nvdec->channel);
 
-   clk_disable_unprepare(nvdec->clk);
+   clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
 
return 0;
 }
@@ -383,13 +385,22 @@ static int nvdec_probe(struct platform_device *pdev)
if (IS_ERR(nvdec->regs))
return PTR_ERR(nvdec->regs);
 
-   nvdec->clk = devm_clk_get(dev, NULL);
-   if (IS_ERR(nvdec->clk)) {
-   dev_err(>dev, "failed to get clock\n");
-   return PTR_ERR(nvdec->clk);
+   nvdec->clks[0].id = "nvdec";
+   nvdec->num_clks = 1;
+
+   if (nvdec->config->has_extra_clocks) {
+   nvdec->num_clks = 3;
+   nvdec->clks[1].id = "fuse";
+   nvdec->clks[2].id = "tsec_pka";
+   }
+
+   err = devm_clk_bulk_get(dev, nvdec->num_clks, nvdec->clks);
+   if (err) {
+   dev_err(>dev, "failed to get clock(s)\n");
+   return err;
}
 
-   err = clk_set_rate(nvdec->clk, ULONG_MAX);
+   err = clk_set_rate(nvdec->clks[0].clk, ULONG_MAX);
if (err < 0) {
dev_err(>dev, "failed to set clock rate\n");
return err;
-- 
2.37.0



[PATCH v3 1/8] memory: tegra: Add API for retrieving carveout bounds

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra234 NVDEC firmware is loaded from a secure carveout, where it
has been loaded by a bootloader. When booting NVDEC, we need to tell it
the address of this firmware, which we can determine by checking the
starting address of the carveout. As such, add an MC API to query the
bounds of carveouts, and add related information on Tegra234.

Signed-off-by: Mikko Perttunen 
---
v2:
- Add check for 64-bit phys_addr_t. In practice phys_addr_t
  is always 64 bits where this runs, but it avoids warnings in
  compile test.
---
 drivers/memory/tegra/mc.c   | 25 +
 drivers/memory/tegra/tegra234.c |  5 +
 include/soc/tegra/mc.h  | 11 +++
 3 files changed, 41 insertions(+)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 2f7a58a9df1a..592907546ee6 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -107,6 +107,31 @@ int tegra_mc_probe_device(struct tegra_mc *mc, struct 
device *dev)
 }
 EXPORT_SYMBOL_GPL(tegra_mc_probe_device);
 
+int tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size)
+{
+   u32 offset;
+
+   if (id < 1 || id >= mc->soc->num_carveouts)
+   return -EINVAL;
+
+   if (id < 6)
+   offset = 0xc0c + 0x50 * (id - 1);
+   else
+   offset = 0x2004 + 0x50 * (id - 6);
+
+   *base = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 0x0);
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+   *base |= (phys_addr_t)mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 
0x4) << 32;
+#endif
+
+   if (size)
+   *size = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 0x8) << 
17;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(tegra_mc_get_carveout_info);
+
 static int tegra_mc_block_dma_common(struct tegra_mc *mc,
 const struct tegra_mc_reset *rst)
 {
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index a9e8fd99730f..74d291d66366 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -187,4 +187,9 @@ const struct tegra_mc_soc tegra234_mc_soc = {
.ops = _mc_ops,
.ch_intmask = 0xff00,
.global_intstatus_channel_shift = 8,
+   /*
+* Additionally, there are lite carveouts but those are not currently
+* supported.
+*/
+   .num_carveouts = 32,
 };
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 47ce6d434427..51a2263e1bc5 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -193,6 +193,8 @@ struct tegra_mc_soc {
unsigned int num_address_bits;
unsigned int atom_size;
 
+   unsigned int num_carveouts;
+
u16 client_id_mask;
u8 num_channels;
 
@@ -244,6 +246,8 @@ unsigned int tegra_mc_get_emem_device_count(struct tegra_mc 
*mc);
 #ifdef CONFIG_TEGRA_MC
 struct tegra_mc *devm_tegra_memory_controller_get(struct device *dev);
 int tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev);
+int tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size);
 #else
 static inline struct tegra_mc *
 devm_tegra_memory_controller_get(struct device *dev)
@@ -256,6 +260,13 @@ tegra_mc_probe_device(struct tegra_mc *mc, struct device 
*dev)
 {
return -ENODEV;
 }
+
+static inline int
+tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size)
+{
+   return -ENODEV;
+}
 #endif
 
 #endif /* __SOC_TEGRA_MC_H__ */
-- 
2.37.0



[PATCH v3 7/8] drm/tegra: Add code for booting RISC-V based engines

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

Add helper code for booting RISC-V based engines where firmware is
located in a carveout.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/Makefile |   3 +-
 drivers/gpu/drm/tegra/riscv.c  | 106 +
 drivers/gpu/drm/tegra/riscv.h  |  30 ++
 3 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tegra/riscv.c
 create mode 100644 drivers/gpu/drm/tegra/riscv.h

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index df6cc986aeba..bb0d2c144b55 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -24,7 +24,8 @@ tegra-drm-y := \
gr3d.o \
falcon.o \
vic.o \
-   nvdec.o
+   nvdec.o \
+   riscv.o
 
 tegra-drm-y += trace.o
 
diff --git a/drivers/gpu/drm/tegra/riscv.c b/drivers/gpu/drm/tegra/riscv.c
new file mode 100644
index ..6580416408f8
--- /dev/null
+++ b/drivers/gpu/drm/tegra/riscv.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, NVIDIA Corporation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "riscv.h"
+
+#define RISCV_CPUCTL   0x4388
+#define RISCV_CPUCTL_STARTCPU_TRUE (1 << 0)
+#define RISCV_BR_RETCODE   0x465c
+#define RISCV_BR_RETCODE_RESULT_V(x)   ((x) & 0x3)
+#define RISCV_BR_RETCODE_RESULT_PASS_V 3
+#define RISCV_BCR_CTRL 0x4668
+#define RISCV_BCR_CTRL_CORE_SELECT_RISCV   (1 << 4)
+#define RISCV_BCR_DMACFG   0x466c
+#define RISCV_BCR_DMACFG_TARGET_LOCAL_FB   (0 << 0)
+#define RISCV_BCR_DMACFG_LOCK_LOCKED   (1 << 31)
+#define RISCV_BCR_DMAADDR_PKCPARAM_LO  0x4670
+#define RISCV_BCR_DMAADDR_PKCPARAM_HI  0x4674
+#define RISCV_BCR_DMAADDR_FMCCODE_LO   0x4678
+#define RISCV_BCR_DMAADDR_FMCCODE_HI   0x467c
+#define RISCV_BCR_DMAADDR_FMCDATA_LO   0x4680
+#define RISCV_BCR_DMAADDR_FMCDATA_HI   0x4684
+#define RISCV_BCR_DMACFG_SEC   0x4694
+#define RISCV_BCR_DMACFG_SEC_GSCID(v)  ((v) << 16)
+
+static void riscv_writel(struct tegra_drm_riscv *riscv, u32 value, u32 offset)
+{
+   writel(value, riscv->regs + offset);
+}
+
+int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv)
+{
+   struct tegra_drm_riscv_descriptor *bl = >bl_desc;
+   struct tegra_drm_riscv_descriptor *os = >os_desc;
+   const struct device_node *np = riscv->dev->of_node;
+   int err;
+
+#define READ_PROP(name, location) \
+   err = of_property_read_u32(np, name, location); \
+   if (err) { \
+   dev_err(riscv->dev, "failed to read " name ": %d\n", err); \
+   return err; \
+   }
+
+   READ_PROP("nvidia,bl-manifest-offset", >manifest_offset);
+   READ_PROP("nvidia,bl-code-offset", >code_offset);
+   READ_PROP("nvidia,bl-data-offset", >data_offset);
+   READ_PROP("nvidia,os-manifest-offset", >manifest_offset);
+   READ_PROP("nvidia,os-code-offset", >code_offset);
+   READ_PROP("nvidia,os-data-offset", >data_offset);
+#undef READ_PROP
+
+   if (bl->manifest_offset == 0 && bl->code_offset == 0 &&
+   bl->data_offset == 0 && os->manifest_offset == 0 &&
+   os->code_offset == 0 && os->data_offset == 0) {
+   dev_err(riscv->dev, "descriptors not available\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t 
image_address,
+u32 gscid, const struct 
tegra_drm_riscv_descriptor *desc)
+{
+   phys_addr_t addr;
+   int err;
+   u32 val;
+
+   riscv_writel(riscv, RISCV_BCR_CTRL_CORE_SELECT_RISCV, RISCV_BCR_CTRL);
+
+   addr = image_address + desc->manifest_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_PKCPARAM_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_PKCPARAM_HI);
+
+   addr = image_address + desc->code_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCCODE_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCCODE_HI);
+
+   addr = image_address + desc->data_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCDATA_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCDATA_HI);
+
+   riscv_writel(riscv

[PATCH v3 2/8] dt-bindings: Add headers for NVDEC on Tegra234

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

Add clock, memory controller, powergate and reset dt-binding headers
necessary for NVDEC.

Signed-off-by: Mikko Perttunen 
Acked-by: Krzysztof Kozlowski 
---
 include/dt-bindings/clock/tegra234-clock.h | 4 
 include/dt-bindings/memory/tegra234-mc.h   | 3 +++
 include/dt-bindings/power/tegra234-powergate.h | 1 +
 include/dt-bindings/reset/tegra234-reset.h | 1 +
 4 files changed, 9 insertions(+)

diff --git a/include/dt-bindings/clock/tegra234-clock.h 
b/include/dt-bindings/clock/tegra234-clock.h
index 173364a93381..25b4a3fb4588 100644
--- a/include/dt-bindings/clock/tegra234-clock.h
+++ b/include/dt-bindings/clock/tegra234-clock.h
@@ -82,6 +82,8 @@
 #define TEGRA234_CLK_I2S6  66U
 /** @brief clock recovered from I2S6 input */
 #define TEGRA234_CLK_I2S6_SYNC_INPUT   67U
+/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_NVDEC */
+#define TEGRA234_CLK_NVDEC  83U
 /** PLL controlled by CLK_RST_CONTROLLER_PLLA_BASE for use by audio clocks */
 #define TEGRA234_CLK_PLLA  93U
 /** @brief PLLP clk output */
@@ -130,6 +132,8 @@
 #define TEGRA234_CLK_SYNC_I2S5 149U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S6 
*/
 #define TEGRA234_CLK_SYNC_I2S6 150U
+/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PKA */
+#define TEGRA234_CLK_TSEC_PKA   154U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */
 #define TEGRA234_CLK_UARTA 155U
 /** @brief output of gate CLK_ENB_PEX1_CORE_6 */
diff --git a/include/dt-bindings/memory/tegra234-mc.h 
b/include/dt-bindings/memory/tegra234-mc.h
index 62987b47ce81..75f0bd30d365 100644
--- a/include/dt-bindings/memory/tegra234-mc.h
+++ b/include/dt-bindings/memory/tegra234-mc.h
@@ -32,6 +32,7 @@
 #define TEGRA234_SID_PCIE100x0b
 #define TEGRA234_SID_BPMP  0x10
 #define TEGRA234_SID_HOST1X0x27
+#define TEGRA234_SID_NVDEC 0x29
 #define TEGRA234_SID_VIC   0x34
 
 /*
@@ -91,6 +92,8 @@
 #define TEGRA234_MEMORY_CLIENT_SDMMCWAB 0x67
 #define TEGRA234_MEMORY_CLIENT_VICSRD 0x6c
 #define TEGRA234_MEMORY_CLIENT_VICSWR 0x6d
+#define TEGRA234_MEMORY_CLIENT_NVDECSRD 0x78
+#define TEGRA234_MEMORY_CLIENT_NVDECSWR 0x79
 /* BPMP read client */
 #define TEGRA234_MEMORY_CLIENT_BPMPR 0x93
 /* BPMP write client */
diff --git a/include/dt-bindings/power/tegra234-powergate.h 
b/include/dt-bindings/power/tegra234-powergate.h
index ae9286cef85c..e5dc1e00be95 100644
--- a/include/dt-bindings/power/tegra234-powergate.h
+++ b/include/dt-bindings/power/tegra234-powergate.h
@@ -19,6 +19,7 @@
 #define TEGRA234_POWER_DOMAIN_MGBEB18U
 #define TEGRA234_POWER_DOMAIN_MGBEC19U
 #define TEGRA234_POWER_DOMAIN_MGBED20U
+#define TEGRA234_POWER_DOMAIN_NVDEC 23U
 #define TEGRA234_POWER_DOMAIN_VIC  29U
 
 #endif
diff --git a/include/dt-bindings/reset/tegra234-reset.h 
b/include/dt-bindings/reset/tegra234-reset.h
index d48d22b2bc7f..17163019316c 100644
--- a/include/dt-bindings/reset/tegra234-reset.h
+++ b/include/dt-bindings/reset/tegra234-reset.h
@@ -30,6 +30,7 @@
 #define TEGRA234_RESET_I2C733U
 #define TEGRA234_RESET_I2C834U
 #define TEGRA234_RESET_I2C935U
+#define TEGRA234_RESET_NVDEC44U
 #define TEGRA234_RESET_MGBE0_PCS   45U
 #define TEGRA234_RESET_MGBE0_MAC   46U
 #define TEGRA234_RESET_MGBE1_PCS   49U
-- 
2.37.0



[PATCH v3 8/8] drm/tegra: Add Tegra234 support to NVDEC driver

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

Add support for the Tegra234 version of NVDEC to the NVDEC driver.
This version sports a RISC-V controller and requires a few additional
clocks. After firmware has been loaded, the behavior is, however,
backwards compatible.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c   |   1 +
 drivers/gpu/drm/tegra/nvdec.c | 140 ++
 2 files changed, 126 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6748ec1e0005..a014f11e9edb 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1382,6 +1382,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
{ .compatible = "nvidia,tegra194-vic", },
{ .compatible = "nvidia,tegra194-nvdec", },
{ .compatible = "nvidia,tegra234-vic", },
+   { .compatible = "nvidia,tegra234-nvdec", },
{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 05af4d107421..10fd21517281 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -16,18 +17,21 @@
 #include 
 #include 
 
-#include 
+#include 
 
 #include "drm.h"
 #include "falcon.h"
+#include "riscv.h"
 #include "vic.h"
 
+#define NVDEC_FALCON_DEBUGINFO 0x1094
 #define NVDEC_TFBIF_TRANSCFG   0x2c44
 
 struct nvdec_config {
const char *firmware;
unsigned int version;
bool supports_sid;
+   bool has_riscv;
bool has_extra_clocks;
 };
 
@@ -40,9 +44,14 @@ struct nvdec {
struct device *dev;
struct clk_bulk_data clks[3];
unsigned int num_clks;
+   struct reset_control *reset;
 
/* Platform configuration */
const struct nvdec_config *config;
+
+   /* RISC-V specific data */
+   struct tegra_drm_riscv riscv;
+   phys_addr_t carveout_base;
 };
 
 static inline struct nvdec *to_nvdec(struct tegra_drm_client *client)
@@ -56,7 +65,7 @@ static inline void nvdec_writel(struct nvdec *nvdec, u32 
value,
writel(value, nvdec->regs + offset);
 }
 
-static int nvdec_boot(struct nvdec *nvdec)
+static int nvdec_boot_falcon(struct nvdec *nvdec)
 {
 #ifdef CONFIG_IOMMU_API
struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev);
@@ -92,6 +101,64 @@ static int nvdec_boot(struct nvdec *nvdec)
return 0;
 }
 
+static int nvdec_wait_debuginfo(struct nvdec *nvdec, const char *phase)
+{
+   int err;
+   u32 val;
+
+   err = readl_poll_timeout(nvdec->regs + NVDEC_FALCON_DEBUGINFO, val, val 
== 0x0, 10, 10);
+   if (err) {
+   dev_err(nvdec->dev, "failed to boot %s, debuginfo=0x%x\n", 
phase, val);
+   return err;
+   }
+
+   return 0;
+}
+
+static int nvdec_boot_riscv(struct nvdec *nvdec)
+{
+   int err;
+
+   err = reset_control_acquire(nvdec->reset);
+   if (err)
+   return err;
+
+   nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+   err = tegra_drm_riscv_boot_bootrom(>riscv, nvdec->carveout_base, 
1,
+  >riscv.bl_desc);
+   if (err) {
+   dev_err(nvdec->dev, "failed to execute bootloader\n");
+   goto release_reset;
+   }
+
+   err = nvdec_wait_debuginfo(nvdec, "bootloader");
+   if (err)
+   goto release_reset;
+
+   err = reset_control_reset(nvdec->reset);
+   if (err)
+   goto release_reset;
+
+   nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+   err = tegra_drm_riscv_boot_bootrom(>riscv, nvdec->carveout_base, 
1,
+  >riscv.os_desc);
+   if (err) {
+   dev_err(nvdec->dev, "failed to execute firmware\n");
+   goto release_reset;
+   }
+
+   err = nvdec_wait_debuginfo(nvdec, "firmware");
+   if (err)
+   goto release_reset;
+
+release_reset:
+   reset_control_release(nvdec->reset);
+
+   return err;
+}
+
 static int nvdec_init(struct host1x_client *client)
 {
struct tegra_drm_client *drm = host1x_to_drm_client(client);
@@ -191,7 +258,7 @@ static const struct host1x_client_ops nvdec_client_ops = {
.exit = nvdec_exit,
 };
 
-static int nvdec_load_firmware(struct nvdec *nvdec)
+static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
 {
struct host1x_client *client = >client.base;
struct tegra_drm *tegra = nvdec->client.drm;
@@ -254,7 +321,6 @@ static int nvdec_load_firmware(struct nvdec *nvdec)
return err;
 }
 
-
 static __maybe_unused int nvdec_runtime_resume(struct device *dev)
 {
struct nvdec *nvdec 

[PATCH v3 3/8] dt-bindings: Add bindings for Tegra234 NVDEC

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

Update NVDEC bindings for Tegra234. This new engine version only has
two memory clients, but now requires three clocks, and as a bigger
change the engine loads firmware from a secure carveout configured by
the bootloader.

For the latter, we need to add a phandle to the memory controller
to query the location of this carveout, and several other properties
containing offsets into the firmware inside the carveout. This
carveout is not accessible by the CPU, but is needed by NVDEC,
so we need this information to be relayed from the bootloader.

As the binding was getting large with many conditional properties,
also split the Tegra234 version out into a separate file.

Signed-off-by: Mikko Perttunen 
---
v3:
- Adjusted descriptions for firmware-related DT properties
  as requested.
- Small update to commit message.
v2:
- Split out into separate file to avoid complexity with
  conditionals etc.
---
 .../gpu/host1x/nvidia,tegra234-nvdec.yaml | 156 ++
 1 file changed, 156 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml

diff --git 
a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml 
b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
new file mode 100644
index ..7cc2dd525a96
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra234-nvdec.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: Device tree binding for NVIDIA Tegra234 NVDEC
+
+description: |
+  NVDEC is the hardware video decoder present on NVIDIA Tegra210
+  and newer chips. It is located on the Host1x bus and typically
+  programmed through Host1x channels.
+
+maintainers:
+  - Thierry Reding 
+  - Mikko Perttunen 
+
+properties:
+  $nodename:
+pattern: "^nvdec@[0-9a-f]*$"
+
+  compatible:
+enum:
+  - nvidia,tegra234-nvdec
+
+  reg:
+maxItems: 1
+
+  clocks:
+maxItems: 3
+
+  clock-names:
+items:
+  - const: nvdec
+  - const: fuse
+  - const: tsec_pka
+
+  resets:
+maxItems: 1
+
+  reset-names:
+items:
+  - const: nvdec
+
+  power-domains:
+maxItems: 1
+
+  iommus:
+maxItems: 1
+
+  dma-coherent: true
+
+  interconnects:
+items:
+  - description: DMA read memory client
+  - description: DMA write memory client
+
+  interconnect-names:
+items:
+  - const: dma-mem
+  - const: write
+
+  nvidia,memory-controller:
+$ref: /schemas/types.yaml#/definitions/phandle
+description:
+  phandle to the memory controller for determining information for the 
NVDEC
+  firmware secure carveout. This carveout is configured by the bootloader 
and
+  not accessible to CPU.
+
+  nvidia,bl-manifest-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader manifest from beginning of firmware that was 
configured by
+  the bootloader.
+
+  nvidia,bl-code-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader code section from beginning of firmware that was 
configured by
+  the bootloader.
+
+  nvidia,bl-data-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader data section from beginning of firmware that was 
configured by
+  the bootloader.
+
+  nvidia,os-manifest-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system manifest from beginning of firmware that was 
configured by
+  the bootloader.
+
+  nvidia,os-code-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system code section from beginning of firmware that 
was configured by
+  the bootloader.
+
+  nvidia,os-data-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system data section from beginning of firmware that 
was configured
+  by the bootloader.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - power-domains
+  - nvidia,memory-controller
+  - nvidia,bl-manifest-offset
+  - nvidia,bl-code-offset
+  - nvidia,bl-data-offset
+  - nvidia,os-manifest-offset
+  - nvidia,os-code-offset
+  - nvidia,os-data-offset
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+#include 
+#include 
+#include 
+
+nvdec@1548 {
+compatible = "nvidia,tegra234-nvdec";
+reg = <0x1548 0x0004>;
+clocks = < TEGRA234_CLK_NVDEC>,
+ < TEGRA234_CLK_FUSE>,
+ < TEGRA234_CLK_TSEC_PKA>;
+clock-names = "nvdec", 

[PATCH v3 5/8] gpu: host1x: Add stream ID register data for NVDEC on Tegra234

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

Add entries for NVDEC to the Tegra234 SID table.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 0cd3f97e7e49..d6b4614f968f 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -225,6 +225,18 @@ static const struct host1x_sid_entry tegra234_sid_table[] 
= {
.offset = 0x34,
.limit = 0x34
},
+   {
+   /* NVDEC channel */
+   .base = 0x17c8,
+   .offset = 0x30,
+   .limit = 0x30,
+   },
+   {
+   /* NVDEC MMIO */
+   .base = 0x1698,
+   .offset = 0x34,
+   .limit = 0x34,
+   },
 };
 
 static const struct host1x_info host1x08_info = {
-- 
2.37.0



[PATCH v3 4/8] arm64: tegra: Add NVDEC on Tegra234

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

Add a device tree node for NVDEC on Tegra234.

Booting the firmware requires some information regarding offsets
within the firmware binary. These are passed through the device
tree, but since the values vary depending on the firmware version,
and the firmware itself is not available to the OS, the flasher is
expected to provide a device tree overlay with values corresponding
to the firmware it is flashing. The overlay then replaces the
placeholder values here.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra234.dtsi | 27 
 1 file changed, 27 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index 81a0f599685f..65d49b27bc5f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -586,6 +586,33 @@ vic@1534 {
iommus = <_niso1 TEGRA234_SID_VIC>;
dma-coherent;
};
+
+   nvdec@1548 {
+   compatible = "nvidia,tegra234-nvdec";
+   reg = <0x1548 0x0004>;
+   clocks = < TEGRA234_CLK_NVDEC>,
+< TEGRA234_CLK_FUSE>,
+< TEGRA234_CLK_TSEC_PKA>;
+   clock-names = "nvdec", "fuse", "tsec_pka";
+   resets = < TEGRA234_RESET_NVDEC>;
+   reset-names = "nvdec";
+   power-domains = < 
TEGRA234_POWER_DOMAIN_NVDEC>;
+   interconnects = < 
TEGRA234_MEMORY_CLIENT_NVDECSRD >,
+   < 
TEGRA234_MEMORY_CLIENT_NVDECSWR >;
+   interconnect-names = "dma-mem", "write";
+   iommus = <_niso1 TEGRA234_SID_NVDEC>;
+   dma-coherent;
+
+   nvidia,memory-controller = <>;
+
+   /* Placeholder values, to be replaced with 
values from overlay */
+   nvidia,bl-manifest-offset = <0>;
+   nvidia,bl-data-offset = <0>;
+   nvidia,bl-code-offset = <0>;
+   nvidia,os-manifest-offset = <0>;
+   nvidia,os-data-offset = <0>;
+   nvidia,os-code-offset = <0>;
+   };
};
 
gpio: gpio@220 {
-- 
2.37.0



[PATCH v3 0/8] Support for NVDEC on Tegra234

2022-09-20 Thread Mikko Perttunen
From: Mikko Perttunen 

v3:
* Updated patch 3 based on comments

v2:
* Updated patches 1,3 based on comments
* Added Acked-by to patch 2

Original message:

Hi all,

this series adds support for the HW video decoder, NVDEC,
on Tegra234 (Orin). The main change is a switch from Falcon
to RISC-V for the internal microcontroller, which brings along
a change in how the engine is booted. Otherwise it is backwards
compatible with earlier versions.

In previous iterations, firmware was simply loaded from disk and
written into engine internal memory. Now, the engine has a
bootrom that loads the firmware from a carveout where it has been
loaded by the system bootloader; however, we still need to tell it
where that carveout is loaded and some offsets into it. For that,
the first patch adds a new memory controller API to query the
carveout address. The offsets are read from device tree -- the
expectation is that at flashing time (when the firmware is also
flashed), the flasher also delivers a device tree overlay with
values corresponding to the flashed firmware.

The currently available Linux for Tegra release doesn't yet
include this device tree overlay flashing, and the firmware version
it contains is incompatible with this series. The plan is to fix
that for the next Linux for Tegra release, but if necessary, we
can postpone merging of this series to once those changes are
available.

Thanks!
Mikko

Mikko Perttunen (8):
  memory: tegra: Add API for retrieving carveout bounds
  dt-bindings: Add headers for NVDEC on Tegra234
  dt-bindings: Add bindings for Tegra234 NVDEC
  arm64: tegra: Add NVDEC on Tegra234
  gpu: host1x: Add stream ID register data for NVDEC on Tegra234
  drm/tegra: nvdec: Support multiple clocks
  drm/tegra: Add code for booting RISC-V based engines
  drm/tegra: Add Tegra234 support to NVDEC driver

 .../gpu/host1x/nvidia,tegra234-nvdec.yaml | 156 
 arch/arm64/boot/dts/nvidia/tegra234.dtsi  |  27 +++
 drivers/gpu/drm/tegra/Makefile|   3 +-
 drivers/gpu/drm/tegra/drm.c   |   1 +
 drivers/gpu/drm/tegra/nvdec.c | 171 +++---
 drivers/gpu/drm/tegra/riscv.c | 106 +++
 drivers/gpu/drm/tegra/riscv.h |  30 +++
 drivers/gpu/host1x/dev.c  |  12 ++
 drivers/memory/tegra/mc.c |  25 +++
 drivers/memory/tegra/tegra234.c   |   5 +
 include/dt-bindings/clock/tegra234-clock.h|   4 +
 include/dt-bindings/memory/tegra234-mc.h  |   3 +
 .../dt-bindings/power/tegra234-powergate.h|   1 +
 include/dt-bindings/reset/tegra234-reset.h|   1 +
 include/soc/tegra/mc.h|  11 ++
 15 files changed, 530 insertions(+), 26 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
 create mode 100644 drivers/gpu/drm/tegra/riscv.c
 create mode 100644 drivers/gpu/drm/tegra/riscv.h

-- 
2.37.0



Re: [PATCH v2 3/8] dt-bindings: Add bindings for Tegra234 NVDEC

2022-09-14 Thread Mikko Perttunen

On 9/14/22 15:08, Rob Herring wrote:

On Tue, Sep 13, 2022 at 04:14:41PM +0300, Mikko Perttunen wrote:

From: Mikko Perttunen 

Update NVDEC bindings for Tegra234. This new engine version only has
two memory clients, but now requires three clocks, and as a bigger
change the engine loads firmware from a secure carveout configured by
the bootloader.

For the latter, we need to add a phandle to the memory controller
to query the location of this carveout, and several other properties
containing offsets into the firmware inside the carveout. These
properties are intended to be populated through a device tree overlay
configured at flashing time, so that the values correspond to the
flashed NVDEC firmware.

As the binding was getting large with many conditional properties,
also split the Tegra234 version out into a separate file.

Signed-off-by: Mikko Perttunen 
---
v2:
- Split out into separate file to avoid complexity with
   conditionals etc.
---
  .../gpu/host1x/nvidia,tegra234-nvdec.yaml | 154 ++
  1 file changed, 154 insertions(+)
  create mode 100644 
Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml

diff --git 
a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml 
b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
new file mode 100644
index ..eab0475ca983
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra234-nvdec.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: Device tree binding for NVIDIA Tegra234 NVDEC
+
+description: |
+  NVDEC is the hardware video decoder present on NVIDIA Tegra210
+  and newer chips. It is located on the Host1x bus and typically
+  programmed through Host1x channels.
+
+maintainers:
+  - Thierry Reding 
+  - Mikko Perttunen 
+
+properties:
+  $nodename:
+pattern: "^nvdec@[0-9a-f]*$"
+
+  compatible:
+enum:
+  - nvidia,tegra234-nvdec
+
+  reg:
+maxItems: 1
+
+  clocks:
+maxItems: 3
+
+  clock-names:
+items:
+  - const: nvdec
+  - const: fuse
+  - const: tsec_pka
+
+  resets:
+maxItems: 1
+
+  reset-names:
+items:
+  - const: nvdec
+
+  power-domains:
+maxItems: 1
+
+  iommus:
+maxItems: 1
+
+  dma-coherent: true
+
+  interconnects:
+items:
+  - description: DMA read memory client
+  - description: DMA write memory client
+
+  interconnect-names:
+items:
+  - const: dma-mem
+  - const: write
+
+  nvidia,memory-controller:
+$ref: /schemas/types.yaml#/definitions/phandle
+description:
+  phandle to the memory controller for determining carveout information.
+
+  nvidia,bl-manifest-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader manifest from beginning of firmware. Typically set 
as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,bl-code-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader code section from beginning of firmware. Typically 
set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,bl-data-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader data section from beginning of firmware. Typically 
set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,os-manifest-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system manifest from beginning of firmware. 
Typically set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,os-code-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system code section from beginning of firmware. 
Typically set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,os-data-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system data section from beginning of firmware. 
Typically set as
+  part of a device tree overlay corresponding to flashed firmware.


I don't think DT is the place for describing your runtime loaded
firmware layout.

Rob


The way I see it, from the kernel's point of view it's not runtime 
loaded but a contract with the bootloader. Bootloader sets up hardware 
in a certain way the kernel doesn't otherwise know so the bootloader 
needs to tell the kernel how the hardware is set up.


The fact that the information is supplied through an overlay is 
accidental -- equivalently the bootloader that sets up the firmware 
could adjust the device tree like we do in other situations, but in this 
case an overlay is an e

[PATCH v2 3/8] dt-bindings: Add bindings for Tegra234 NVDEC

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

Update NVDEC bindings for Tegra234. This new engine version only has
two memory clients, but now requires three clocks, and as a bigger
change the engine loads firmware from a secure carveout configured by
the bootloader.

For the latter, we need to add a phandle to the memory controller
to query the location of this carveout, and several other properties
containing offsets into the firmware inside the carveout. These
properties are intended to be populated through a device tree overlay
configured at flashing time, so that the values correspond to the
flashed NVDEC firmware.

As the binding was getting large with many conditional properties,
also split the Tegra234 version out into a separate file.

Signed-off-by: Mikko Perttunen 
---
v2:
- Split out into separate file to avoid complexity with
  conditionals etc.
---
 .../gpu/host1x/nvidia,tegra234-nvdec.yaml | 154 ++
 1 file changed, 154 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml

diff --git 
a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml 
b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
new file mode 100644
index ..eab0475ca983
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
@@ -0,0 +1,154 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra234-nvdec.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: Device tree binding for NVIDIA Tegra234 NVDEC
+
+description: |
+  NVDEC is the hardware video decoder present on NVIDIA Tegra210
+  and newer chips. It is located on the Host1x bus and typically
+  programmed through Host1x channels.
+
+maintainers:
+  - Thierry Reding 
+  - Mikko Perttunen 
+
+properties:
+  $nodename:
+pattern: "^nvdec@[0-9a-f]*$"
+
+  compatible:
+enum:
+  - nvidia,tegra234-nvdec
+
+  reg:
+maxItems: 1
+
+  clocks:
+maxItems: 3
+
+  clock-names:
+items:
+  - const: nvdec
+  - const: fuse
+  - const: tsec_pka
+
+  resets:
+maxItems: 1
+
+  reset-names:
+items:
+  - const: nvdec
+
+  power-domains:
+maxItems: 1
+
+  iommus:
+maxItems: 1
+
+  dma-coherent: true
+
+  interconnects:
+items:
+  - description: DMA read memory client
+  - description: DMA write memory client
+
+  interconnect-names:
+items:
+  - const: dma-mem
+  - const: write
+
+  nvidia,memory-controller:
+$ref: /schemas/types.yaml#/definitions/phandle
+description:
+  phandle to the memory controller for determining carveout information.
+
+  nvidia,bl-manifest-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader manifest from beginning of firmware. Typically set 
as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,bl-code-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader code section from beginning of firmware. Typically 
set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,bl-data-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to bootloader data section from beginning of firmware. Typically 
set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,os-manifest-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system manifest from beginning of firmware. 
Typically set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,os-code-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system code section from beginning of firmware. 
Typically set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+  nvidia,os-data-offset:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  Offset to operating system data section from beginning of firmware. 
Typically set as
+  part of a device tree overlay corresponding to flashed firmware.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - power-domains
+  - nvidia,memory-controller
+  - nvidia,bl-manifest-offset
+  - nvidia,bl-code-offset
+  - nvidia,bl-data-offset
+  - nvidia,os-manifest-offset
+  - nvidia,os-code-offset
+  - nvidia,os-data-offset
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+#include 
+#include 
+#include 
+
+nvdec@1548 {
+compatible = "nvidia,tegra234-nvdec";
+reg = <0x1548 0x0004>;
+clocks = < TEGRA234_CLK_NVDEC>,
+ < TEGRA234_CLK_FUSE>,
+ < TEGRA2

[PATCH v2 7/8] drm/tegra: Add code for booting RISC-V based engines

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

Add helper code for booting RISC-V based engines where firmware is
located in a carveout.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/Makefile |   3 +-
 drivers/gpu/drm/tegra/riscv.c  | 106 +
 drivers/gpu/drm/tegra/riscv.h  |  30 ++
 3 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tegra/riscv.c
 create mode 100644 drivers/gpu/drm/tegra/riscv.h

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index df6cc986aeba..bb0d2c144b55 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -24,7 +24,8 @@ tegra-drm-y := \
gr3d.o \
falcon.o \
vic.o \
-   nvdec.o
+   nvdec.o \
+   riscv.o
 
 tegra-drm-y += trace.o
 
diff --git a/drivers/gpu/drm/tegra/riscv.c b/drivers/gpu/drm/tegra/riscv.c
new file mode 100644
index ..6580416408f8
--- /dev/null
+++ b/drivers/gpu/drm/tegra/riscv.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, NVIDIA Corporation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "riscv.h"
+
+#define RISCV_CPUCTL   0x4388
+#define RISCV_CPUCTL_STARTCPU_TRUE (1 << 0)
+#define RISCV_BR_RETCODE   0x465c
+#define RISCV_BR_RETCODE_RESULT_V(x)   ((x) & 0x3)
+#define RISCV_BR_RETCODE_RESULT_PASS_V 3
+#define RISCV_BCR_CTRL 0x4668
+#define RISCV_BCR_CTRL_CORE_SELECT_RISCV   (1 << 4)
+#define RISCV_BCR_DMACFG   0x466c
+#define RISCV_BCR_DMACFG_TARGET_LOCAL_FB   (0 << 0)
+#define RISCV_BCR_DMACFG_LOCK_LOCKED   (1 << 31)
+#define RISCV_BCR_DMAADDR_PKCPARAM_LO  0x4670
+#define RISCV_BCR_DMAADDR_PKCPARAM_HI  0x4674
+#define RISCV_BCR_DMAADDR_FMCCODE_LO   0x4678
+#define RISCV_BCR_DMAADDR_FMCCODE_HI   0x467c
+#define RISCV_BCR_DMAADDR_FMCDATA_LO   0x4680
+#define RISCV_BCR_DMAADDR_FMCDATA_HI   0x4684
+#define RISCV_BCR_DMACFG_SEC   0x4694
+#define RISCV_BCR_DMACFG_SEC_GSCID(v)  ((v) << 16)
+
+static void riscv_writel(struct tegra_drm_riscv *riscv, u32 value, u32 offset)
+{
+   writel(value, riscv->regs + offset);
+}
+
+int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv)
+{
+   struct tegra_drm_riscv_descriptor *bl = >bl_desc;
+   struct tegra_drm_riscv_descriptor *os = >os_desc;
+   const struct device_node *np = riscv->dev->of_node;
+   int err;
+
+#define READ_PROP(name, location) \
+   err = of_property_read_u32(np, name, location); \
+   if (err) { \
+   dev_err(riscv->dev, "failed to read " name ": %d\n", err); \
+   return err; \
+   }
+
+   READ_PROP("nvidia,bl-manifest-offset", >manifest_offset);
+   READ_PROP("nvidia,bl-code-offset", >code_offset);
+   READ_PROP("nvidia,bl-data-offset", >data_offset);
+   READ_PROP("nvidia,os-manifest-offset", >manifest_offset);
+   READ_PROP("nvidia,os-code-offset", >code_offset);
+   READ_PROP("nvidia,os-data-offset", >data_offset);
+#undef READ_PROP
+
+   if (bl->manifest_offset == 0 && bl->code_offset == 0 &&
+   bl->data_offset == 0 && os->manifest_offset == 0 &&
+   os->code_offset == 0 && os->data_offset == 0) {
+   dev_err(riscv->dev, "descriptors not available\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t 
image_address,
+u32 gscid, const struct 
tegra_drm_riscv_descriptor *desc)
+{
+   phys_addr_t addr;
+   int err;
+   u32 val;
+
+   riscv_writel(riscv, RISCV_BCR_CTRL_CORE_SELECT_RISCV, RISCV_BCR_CTRL);
+
+   addr = image_address + desc->manifest_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_PKCPARAM_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_PKCPARAM_HI);
+
+   addr = image_address + desc->code_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCCODE_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCCODE_HI);
+
+   addr = image_address + desc->data_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCDATA_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCDATA_HI);
+
+   riscv_writel(riscv

[PATCH v2 1/8] memory: tegra: Add API for retrieving carveout bounds

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra234 NVDEC firmware is loaded from a secure carveout, where it
has been loaded by a bootloader. When booting NVDEC, we need to tell it
the address of this firmware, which we can determine by checking the
starting address of the carveout. As such, add an MC API to query the
bounds of carveouts, and add related information on Tegra234.

Signed-off-by: Mikko Perttunen 
---
v2:
- Add check for 64-bit phys_addr_t. In practice phys_addr_t
  is always 64 bits where this runs, but it avoids warnings in
  compile test.
---
 drivers/memory/tegra/mc.c   | 25 +
 drivers/memory/tegra/tegra234.c |  5 +
 include/soc/tegra/mc.h  | 11 +++
 3 files changed, 41 insertions(+)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 2f7a58a9df1a..592907546ee6 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -107,6 +107,31 @@ int tegra_mc_probe_device(struct tegra_mc *mc, struct 
device *dev)
 }
 EXPORT_SYMBOL_GPL(tegra_mc_probe_device);
 
+int tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size)
+{
+   u32 offset;
+
+   if (id < 1 || id >= mc->soc->num_carveouts)
+   return -EINVAL;
+
+   if (id < 6)
+   offset = 0xc0c + 0x50 * (id - 1);
+   else
+   offset = 0x2004 + 0x50 * (id - 6);
+
+   *base = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 0x0);
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+   *base |= (phys_addr_t)mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 
0x4) << 32;
+#endif
+
+   if (size)
+   *size = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 0x8) << 
17;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(tegra_mc_get_carveout_info);
+
 static int tegra_mc_block_dma_common(struct tegra_mc *mc,
 const struct tegra_mc_reset *rst)
 {
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index a9e8fd99730f..74d291d66366 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -187,4 +187,9 @@ const struct tegra_mc_soc tegra234_mc_soc = {
.ops = _mc_ops,
.ch_intmask = 0xff00,
.global_intstatus_channel_shift = 8,
+   /*
+* Additionally, there are lite carveouts but those are not currently
+* supported.
+*/
+   .num_carveouts = 32,
 };
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 47ce6d434427..51a2263e1bc5 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -193,6 +193,8 @@ struct tegra_mc_soc {
unsigned int num_address_bits;
unsigned int atom_size;
 
+   unsigned int num_carveouts;
+
u16 client_id_mask;
u8 num_channels;
 
@@ -244,6 +246,8 @@ unsigned int tegra_mc_get_emem_device_count(struct tegra_mc 
*mc);
 #ifdef CONFIG_TEGRA_MC
 struct tegra_mc *devm_tegra_memory_controller_get(struct device *dev);
 int tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev);
+int tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size);
 #else
 static inline struct tegra_mc *
 devm_tegra_memory_controller_get(struct device *dev)
@@ -256,6 +260,13 @@ tegra_mc_probe_device(struct tegra_mc *mc, struct device 
*dev)
 {
return -ENODEV;
 }
+
+static inline int
+tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size)
+{
+   return -ENODEV;
+}
 #endif
 
 #endif /* __SOC_TEGRA_MC_H__ */
-- 
2.37.0



[PATCH v2 0/8] Support for NVDEC on Tegra234

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

v2:
* Updated patches 1,3 based on comments
* Added Acked-by to patch 2

Original message:

Hi all,

this series adds support for the HW video decoder, NVDEC,
on Tegra234 (Orin). The main change is a switch from Falcon
to RISC-V for the internal microcontroller, which brings along
a change in how the engine is booted. Otherwise it is backwards
compatible with earlier versions.

In previous iterations, firmware was simply loaded from disk and
written into engine internal memory. Now, the engine has a
bootrom that loads the firmware from a carveout where it has been
loaded by the system bootloader; however, we still need to tell it
where that carveout is loaded and some offsets into it. For that,
the first patch adds a new memory controller API to query the
carveout address. The offsets are read from device tree -- the
expectation is that at flashing time (when the firmware is also
flashed), the flasher also delivers a device tree overlay with
values corresponding to the flashed firmware.

The currently available Linux for Tegra release doesn't yet
include this device tree overlay flashing, and the firmware version
it contains is incompatible with this series. The plan is to fix
that for the next Linux for Tegra release, but if necessary, we
can postpone merging of this series to once those changes are
available.

Thanks!
Mikko

Mikko Perttunen (8):
  memory: tegra: Add API for retrieving carveout bounds
  dt-bindings: Add headers for NVDEC on Tegra234
  dt-bindings: Add bindings for Tegra234 NVDEC
  arm64: tegra: Add NVDEC on Tegra234
  gpu: host1x: Add stream ID register data for NVDEC on Tegra234
  drm/tegra: nvdec: Support multiple clocks
  drm/tegra: Add code for booting RISC-V based engines
  drm/tegra: Add Tegra234 support to NVDEC driver

 .../gpu/host1x/nvidia,tegra234-nvdec.yaml | 154 
 arch/arm64/boot/dts/nvidia/tegra234.dtsi  |  27 +++
 drivers/gpu/drm/tegra/Makefile|   3 +-
 drivers/gpu/drm/tegra/drm.c   |   1 +
 drivers/gpu/drm/tegra/nvdec.c | 171 +++---
 drivers/gpu/drm/tegra/riscv.c | 106 +++
 drivers/gpu/drm/tegra/riscv.h |  30 +++
 drivers/gpu/host1x/dev.c  |  12 ++
 drivers/memory/tegra/mc.c |  25 +++
 drivers/memory/tegra/tegra234.c   |   5 +
 include/dt-bindings/clock/tegra234-clock.h|   4 +
 include/dt-bindings/memory/tegra234-mc.h  |   3 +
 .../dt-bindings/power/tegra234-powergate.h|   1 +
 include/dt-bindings/reset/tegra234-reset.h|   1 +
 include/soc/tegra/mc.h|  11 ++
 15 files changed, 528 insertions(+), 26 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
 create mode 100644 drivers/gpu/drm/tegra/riscv.c
 create mode 100644 drivers/gpu/drm/tegra/riscv.h

-- 
2.37.0



[PATCH v2 6/8] drm/tegra: nvdec: Support multiple clocks

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

NVDEC on Tegra234 requires multiple clocks. Add support for that.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/nvdec.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 276fe0472730..05af4d107421 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2015-2021, NVIDIA Corporation.
+ * Copyright (c) 2015-2022, NVIDIA Corporation.
  */
 
 #include 
@@ -28,6 +28,7 @@ struct nvdec_config {
const char *firmware;
unsigned int version;
bool supports_sid;
+   bool has_extra_clocks;
 };
 
 struct nvdec {
@@ -37,7 +38,8 @@ struct nvdec {
struct tegra_drm_client client;
struct host1x_channel *channel;
struct device *dev;
-   struct clk *clk;
+   struct clk_bulk_data clks[3];
+   unsigned int num_clks;
 
/* Platform configuration */
const struct nvdec_config *config;
@@ -258,7 +260,7 @@ static __maybe_unused int nvdec_runtime_resume(struct 
device *dev)
struct nvdec *nvdec = dev_get_drvdata(dev);
int err;
 
-   err = clk_prepare_enable(nvdec->clk);
+   err = clk_bulk_prepare_enable(nvdec->num_clks, nvdec->clks);
if (err < 0)
return err;
 
@@ -275,7 +277,7 @@ static __maybe_unused int nvdec_runtime_resume(struct 
device *dev)
return 0;
 
 disable:
-   clk_disable_unprepare(nvdec->clk);
+   clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
return err;
 }
 
@@ -285,7 +287,7 @@ static __maybe_unused int nvdec_runtime_suspend(struct 
device *dev)
 
host1x_channel_stop(nvdec->channel);
 
-   clk_disable_unprepare(nvdec->clk);
+   clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
 
return 0;
 }
@@ -383,13 +385,22 @@ static int nvdec_probe(struct platform_device *pdev)
if (IS_ERR(nvdec->regs))
return PTR_ERR(nvdec->regs);
 
-   nvdec->clk = devm_clk_get(dev, NULL);
-   if (IS_ERR(nvdec->clk)) {
-   dev_err(>dev, "failed to get clock\n");
-   return PTR_ERR(nvdec->clk);
+   nvdec->clks[0].id = "nvdec";
+   nvdec->num_clks = 1;
+
+   if (nvdec->config->has_extra_clocks) {
+   nvdec->num_clks = 3;
+   nvdec->clks[1].id = "fuse";
+   nvdec->clks[2].id = "tsec_pka";
+   }
+
+   err = devm_clk_bulk_get(dev, nvdec->num_clks, nvdec->clks);
+   if (err) {
+   dev_err(>dev, "failed to get clock(s)\n");
+   return err;
}
 
-   err = clk_set_rate(nvdec->clk, ULONG_MAX);
+   err = clk_set_rate(nvdec->clks[0].clk, ULONG_MAX);
if (err < 0) {
dev_err(>dev, "failed to set clock rate\n");
return err;
-- 
2.37.0



[PATCH v2 5/8] gpu: host1x: Add stream ID register data for NVDEC on Tegra234

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

Add entries for NVDEC to the Tegra234 SID table.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 0cd3f97e7e49..d6b4614f968f 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -225,6 +225,18 @@ static const struct host1x_sid_entry tegra234_sid_table[] 
= {
.offset = 0x34,
.limit = 0x34
},
+   {
+   /* NVDEC channel */
+   .base = 0x17c8,
+   .offset = 0x30,
+   .limit = 0x30,
+   },
+   {
+   /* NVDEC MMIO */
+   .base = 0x1698,
+   .offset = 0x34,
+   .limit = 0x34,
+   },
 };
 
 static const struct host1x_info host1x08_info = {
-- 
2.37.0



[PATCH v2 2/8] dt-bindings: Add headers for NVDEC on Tegra234

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

Add clock, memory controller, powergate and reset dt-binding headers
necessary for NVDEC.

Signed-off-by: Mikko Perttunen 
Acked-by: Krzysztof Kozlowski 
---
 include/dt-bindings/clock/tegra234-clock.h | 4 
 include/dt-bindings/memory/tegra234-mc.h   | 3 +++
 include/dt-bindings/power/tegra234-powergate.h | 1 +
 include/dt-bindings/reset/tegra234-reset.h | 1 +
 4 files changed, 9 insertions(+)

diff --git a/include/dt-bindings/clock/tegra234-clock.h 
b/include/dt-bindings/clock/tegra234-clock.h
index 173364a93381..25b4a3fb4588 100644
--- a/include/dt-bindings/clock/tegra234-clock.h
+++ b/include/dt-bindings/clock/tegra234-clock.h
@@ -82,6 +82,8 @@
 #define TEGRA234_CLK_I2S6  66U
 /** @brief clock recovered from I2S6 input */
 #define TEGRA234_CLK_I2S6_SYNC_INPUT   67U
+/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_NVDEC */
+#define TEGRA234_CLK_NVDEC  83U
 /** PLL controlled by CLK_RST_CONTROLLER_PLLA_BASE for use by audio clocks */
 #define TEGRA234_CLK_PLLA  93U
 /** @brief PLLP clk output */
@@ -130,6 +132,8 @@
 #define TEGRA234_CLK_SYNC_I2S5 149U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S6 
*/
 #define TEGRA234_CLK_SYNC_I2S6 150U
+/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PKA */
+#define TEGRA234_CLK_TSEC_PKA   154U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */
 #define TEGRA234_CLK_UARTA 155U
 /** @brief output of gate CLK_ENB_PEX1_CORE_6 */
diff --git a/include/dt-bindings/memory/tegra234-mc.h 
b/include/dt-bindings/memory/tegra234-mc.h
index 62987b47ce81..75f0bd30d365 100644
--- a/include/dt-bindings/memory/tegra234-mc.h
+++ b/include/dt-bindings/memory/tegra234-mc.h
@@ -32,6 +32,7 @@
 #define TEGRA234_SID_PCIE100x0b
 #define TEGRA234_SID_BPMP  0x10
 #define TEGRA234_SID_HOST1X0x27
+#define TEGRA234_SID_NVDEC 0x29
 #define TEGRA234_SID_VIC   0x34
 
 /*
@@ -91,6 +92,8 @@
 #define TEGRA234_MEMORY_CLIENT_SDMMCWAB 0x67
 #define TEGRA234_MEMORY_CLIENT_VICSRD 0x6c
 #define TEGRA234_MEMORY_CLIENT_VICSWR 0x6d
+#define TEGRA234_MEMORY_CLIENT_NVDECSRD 0x78
+#define TEGRA234_MEMORY_CLIENT_NVDECSWR 0x79
 /* BPMP read client */
 #define TEGRA234_MEMORY_CLIENT_BPMPR 0x93
 /* BPMP write client */
diff --git a/include/dt-bindings/power/tegra234-powergate.h 
b/include/dt-bindings/power/tegra234-powergate.h
index ae9286cef85c..e5dc1e00be95 100644
--- a/include/dt-bindings/power/tegra234-powergate.h
+++ b/include/dt-bindings/power/tegra234-powergate.h
@@ -19,6 +19,7 @@
 #define TEGRA234_POWER_DOMAIN_MGBEB18U
 #define TEGRA234_POWER_DOMAIN_MGBEC19U
 #define TEGRA234_POWER_DOMAIN_MGBED20U
+#define TEGRA234_POWER_DOMAIN_NVDEC 23U
 #define TEGRA234_POWER_DOMAIN_VIC  29U
 
 #endif
diff --git a/include/dt-bindings/reset/tegra234-reset.h 
b/include/dt-bindings/reset/tegra234-reset.h
index d48d22b2bc7f..17163019316c 100644
--- a/include/dt-bindings/reset/tegra234-reset.h
+++ b/include/dt-bindings/reset/tegra234-reset.h
@@ -30,6 +30,7 @@
 #define TEGRA234_RESET_I2C733U
 #define TEGRA234_RESET_I2C834U
 #define TEGRA234_RESET_I2C935U
+#define TEGRA234_RESET_NVDEC44U
 #define TEGRA234_RESET_MGBE0_PCS   45U
 #define TEGRA234_RESET_MGBE0_MAC   46U
 #define TEGRA234_RESET_MGBE1_PCS   49U
-- 
2.37.0



[PATCH v2 8/8] drm/tegra: Add Tegra234 support to NVDEC driver

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

Add support for the Tegra234 version of NVDEC to the NVDEC driver.
This version sports a RISC-V controller and requires a few additional
clocks. After firmware has been loaded, the behavior is, however,
backwards compatible.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c   |   1 +
 drivers/gpu/drm/tegra/nvdec.c | 140 ++
 2 files changed, 126 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6748ec1e0005..a014f11e9edb 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1382,6 +1382,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
{ .compatible = "nvidia,tegra194-vic", },
{ .compatible = "nvidia,tegra194-nvdec", },
{ .compatible = "nvidia,tegra234-vic", },
+   { .compatible = "nvidia,tegra234-nvdec", },
{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 05af4d107421..10fd21517281 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -16,18 +17,21 @@
 #include 
 #include 
 
-#include 
+#include 
 
 #include "drm.h"
 #include "falcon.h"
+#include "riscv.h"
 #include "vic.h"
 
+#define NVDEC_FALCON_DEBUGINFO 0x1094
 #define NVDEC_TFBIF_TRANSCFG   0x2c44
 
 struct nvdec_config {
const char *firmware;
unsigned int version;
bool supports_sid;
+   bool has_riscv;
bool has_extra_clocks;
 };
 
@@ -40,9 +44,14 @@ struct nvdec {
struct device *dev;
struct clk_bulk_data clks[3];
unsigned int num_clks;
+   struct reset_control *reset;
 
/* Platform configuration */
const struct nvdec_config *config;
+
+   /* RISC-V specific data */
+   struct tegra_drm_riscv riscv;
+   phys_addr_t carveout_base;
 };
 
 static inline struct nvdec *to_nvdec(struct tegra_drm_client *client)
@@ -56,7 +65,7 @@ static inline void nvdec_writel(struct nvdec *nvdec, u32 
value,
writel(value, nvdec->regs + offset);
 }
 
-static int nvdec_boot(struct nvdec *nvdec)
+static int nvdec_boot_falcon(struct nvdec *nvdec)
 {
 #ifdef CONFIG_IOMMU_API
struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev);
@@ -92,6 +101,64 @@ static int nvdec_boot(struct nvdec *nvdec)
return 0;
 }
 
+static int nvdec_wait_debuginfo(struct nvdec *nvdec, const char *phase)
+{
+   int err;
+   u32 val;
+
+   err = readl_poll_timeout(nvdec->regs + NVDEC_FALCON_DEBUGINFO, val, val 
== 0x0, 10, 10);
+   if (err) {
+   dev_err(nvdec->dev, "failed to boot %s, debuginfo=0x%x\n", 
phase, val);
+   return err;
+   }
+
+   return 0;
+}
+
+static int nvdec_boot_riscv(struct nvdec *nvdec)
+{
+   int err;
+
+   err = reset_control_acquire(nvdec->reset);
+   if (err)
+   return err;
+
+   nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+   err = tegra_drm_riscv_boot_bootrom(>riscv, nvdec->carveout_base, 
1,
+  >riscv.bl_desc);
+   if (err) {
+   dev_err(nvdec->dev, "failed to execute bootloader\n");
+   goto release_reset;
+   }
+
+   err = nvdec_wait_debuginfo(nvdec, "bootloader");
+   if (err)
+   goto release_reset;
+
+   err = reset_control_reset(nvdec->reset);
+   if (err)
+   goto release_reset;
+
+   nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+   err = tegra_drm_riscv_boot_bootrom(>riscv, nvdec->carveout_base, 
1,
+  >riscv.os_desc);
+   if (err) {
+   dev_err(nvdec->dev, "failed to execute firmware\n");
+   goto release_reset;
+   }
+
+   err = nvdec_wait_debuginfo(nvdec, "firmware");
+   if (err)
+   goto release_reset;
+
+release_reset:
+   reset_control_release(nvdec->reset);
+
+   return err;
+}
+
 static int nvdec_init(struct host1x_client *client)
 {
struct tegra_drm_client *drm = host1x_to_drm_client(client);
@@ -191,7 +258,7 @@ static const struct host1x_client_ops nvdec_client_ops = {
.exit = nvdec_exit,
 };
 
-static int nvdec_load_firmware(struct nvdec *nvdec)
+static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
 {
struct host1x_client *client = >client.base;
struct tegra_drm *tegra = nvdec->client.drm;
@@ -254,7 +321,6 @@ static int nvdec_load_firmware(struct nvdec *nvdec)
return err;
 }
 
-
 static __maybe_unused int nvdec_runtime_resume(struct device *dev)
 {
struct nvdec *nvdec 

[PATCH v2 4/8] arm64: tegra: Add NVDEC on Tegra234

2022-09-13 Thread Mikko Perttunen
From: Mikko Perttunen 

Add a device tree node for NVDEC on Tegra234.

Booting the firmware requires some information regarding offsets
within the firmware binary. These are passed through the device
tree, but since the values vary depending on the firmware version,
and the firmware itself is not available to the OS, the flasher is
expected to provide a device tree overlay with values corresponding
to the firmware it is flashing. The overlay then replaces the
placeholder values here.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra234.dtsi | 27 
 1 file changed, 27 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index 81a0f599685f..65d49b27bc5f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -586,6 +586,33 @@ vic@1534 {
iommus = <_niso1 TEGRA234_SID_VIC>;
dma-coherent;
};
+
+   nvdec@1548 {
+   compatible = "nvidia,tegra234-nvdec";
+   reg = <0x1548 0x0004>;
+   clocks = < TEGRA234_CLK_NVDEC>,
+< TEGRA234_CLK_FUSE>,
+< TEGRA234_CLK_TSEC_PKA>;
+   clock-names = "nvdec", "fuse", "tsec_pka";
+   resets = < TEGRA234_RESET_NVDEC>;
+   reset-names = "nvdec";
+   power-domains = < 
TEGRA234_POWER_DOMAIN_NVDEC>;
+   interconnects = < 
TEGRA234_MEMORY_CLIENT_NVDECSRD >,
+   < 
TEGRA234_MEMORY_CLIENT_NVDECSWR >;
+   interconnect-names = "dma-mem", "write";
+   iommus = <_niso1 TEGRA234_SID_NVDEC>;
+   dma-coherent;
+
+   nvidia,memory-controller = <>;
+
+   /* Placeholder values, to be replaced with 
values from overlay */
+   nvidia,bl-manifest-offset = <0>;
+   nvidia,bl-data-offset = <0>;
+   nvidia,bl-code-offset = <0>;
+   nvidia,os-manifest-offset = <0>;
+   nvidia,os-data-offset = <0>;
+   nvidia,os-code-offset = <0>;
+   };
};
 
gpio: gpio@220 {
-- 
2.37.0



Re: [PATCH 0/8] Support for NVDEC on Tegra234

2022-09-07 Thread Mikko Perttunen

On 7.9.2022 13.58, Krzysztof Kozlowski wrote:

On 07/09/2022 07:27, Mikko Perttunen wrote:

On 9/6/22 20:50, Krzysztof Kozlowski wrote:

On 06/09/2022 15:28, Mikko Perttunen wrote:

From: Mikko Perttunen 

Hi all,

this series adds support for the HW video decoder, NVDEC,
on Tegra234 (Orin). The main change is a switch from Falcon
to RISC-V for the internal microcontroller, which brings along
a change in how the engine is booted. Otherwise it is backwards
compatible with earlier versions.


You need to describe the dependencies, otherwise I would be free to go
with applying memory controllers part.


Hi Krzysztof,

the memory controller patch can be applied independently.


OK then... but looking at the code it does not seem to. Anyway kbuild
robot complained so I expect v2.


Ah, indeed, though patch 1 can be applied on top of current trees, patch 
8 does require patch 1 to be there first. Which is, thinking about it 
now, necessary information as well..


Thanks for the reviews.

Mikko



Best regards,
Krzysztof


[PATCH 3/3] arm64: tegra: Add context isolation domains on Tegra234

2022-09-07 Thread Mikko Perttunen
From: Mikko Perttunen 

Add Host1x context isolation domains on Tegra234. On Tegra234 we have
two IOMMUs that are connected to Host1x-channel programmed engines,
so we have to include domains for each of them.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra234.dtsi | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index 65d49b27bc5f..d764bd98433a 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -570,6 +570,25 @@ host1x@13e0 {
interconnect-names = "dma-mem";
iommus = <_niso1 TEGRA234_SID_HOST1X>;
 
+   /* Context isolation domains */
+   iommu-map = <
+   0 _niso0 TEGRA234_SID_HOST1X_CTX0 1
+   1 _niso0 TEGRA234_SID_HOST1X_CTX1 1
+   2 _niso0 TEGRA234_SID_HOST1X_CTX2 1
+   3 _niso0 TEGRA234_SID_HOST1X_CTX3 1
+   4 _niso0 TEGRA234_SID_HOST1X_CTX4 1
+   5 _niso0 TEGRA234_SID_HOST1X_CTX5 1
+   6 _niso0 TEGRA234_SID_HOST1X_CTX6 1
+   7 _niso0 TEGRA234_SID_HOST1X_CTX7 1
+   8 _niso1 TEGRA234_SID_HOST1X_CTX0 1
+   9 _niso1 TEGRA234_SID_HOST1X_CTX1 1
+   10 _niso1 TEGRA234_SID_HOST1X_CTX2 1
+   11 _niso1 TEGRA234_SID_HOST1X_CTX3 1
+   12 _niso1 TEGRA234_SID_HOST1X_CTX4 1
+   13 _niso1 TEGRA234_SID_HOST1X_CTX5 1
+   14 _niso1 TEGRA234_SID_HOST1X_CTX6 1
+   15 _niso1 TEGRA234_SID_HOST1X_CTX7 1>;
+
vic@1534 {
compatible = "nvidia,tegra234-vic";
reg = <0x1534 0x0004>;
-- 
2.37.0



[PATCH 2/3] dt-bindings: Add Host1x context stream IDs on Tegra234

2022-09-07 Thread Mikko Perttunen
From: Mikko Perttunen 

Add defines for stream IDs used for Host1x context isolation
on Tegra234. The same stream IDs are used for both NISO0 and
NISO1 SMMUs since Host1x's stream ID protection tables don't
make a distinction between the two.

Signed-off-by: Mikko Perttunen 
---
 include/dt-bindings/memory/tegra234-mc.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/dt-bindings/memory/tegra234-mc.h 
b/include/dt-bindings/memory/tegra234-mc.h
index 75f0bd30d365..d9b21b64ed73 100644
--- a/include/dt-bindings/memory/tegra234-mc.h
+++ b/include/dt-bindings/memory/tegra234-mc.h
@@ -35,6 +35,16 @@
 #define TEGRA234_SID_NVDEC 0x29
 #define TEGRA234_SID_VIC   0x34
 
+/* Shared stream IDs */
+#define TEGRA234_SID_HOST1X_CTX0   0x35
+#define TEGRA234_SID_HOST1X_CTX1   0x36
+#define TEGRA234_SID_HOST1X_CTX2   0x37
+#define TEGRA234_SID_HOST1X_CTX3   0x38
+#define TEGRA234_SID_HOST1X_CTX4   0x39
+#define TEGRA234_SID_HOST1X_CTX5   0x3a
+#define TEGRA234_SID_HOST1X_CTX6   0x3b
+#define TEGRA234_SID_HOST1X_CTX7   0x3c
+
 /*
  * memory client IDs
  */
-- 
2.37.0



[PATCH 1/3] gpu: host1x: Select context device based on attached IOMMU

2022-09-07 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra234, engines that are programmed through Host1x channels can
be attached to either the NISO0 or NISO1 SMMU. Because of that, when
selecting a context device to use with an engine, we need to select
one that is also attached to the same SMMU.

Add a parameter to host1x_memory_context_alloc to specify which device
we are allocating a context for, and use it to pick an appropriate
context device.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/uapi.c | 2 +-
 drivers/gpu/host1x/context.c | 5 +
 include/linux/host1x.h   | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c
index a98239cb0e29..5adab6b22916 100644
--- a/drivers/gpu/drm/tegra/uapi.c
+++ b/drivers/gpu/drm/tegra/uapi.c
@@ -116,7 +116,7 @@ int tegra_drm_ioctl_channel_open(struct drm_device *drm, 
void *data, struct drm_
 
if (supported)
context->memory_context = host1x_memory_context_alloc(
-   host, get_task_pid(current, PIDTYPE_TGID));
+   host, client->base.dev, get_task_pid(current, 
PIDTYPE_TGID));
 
if (IS_ERR(context->memory_context)) {
if (PTR_ERR(context->memory_context) != -EOPNOTSUPP) {
diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
index b08cf11f9a66..8d6447cdd882 100644
--- a/drivers/gpu/host1x/context.c
+++ b/drivers/gpu/host1x/context.c
@@ -104,6 +104,7 @@ void host1x_memory_context_list_free(struct 
host1x_memory_context_list *cdl)
 }
 
 struct host1x_memory_context *host1x_memory_context_alloc(struct host1x 
*host1x,
+ struct device *dev,
  struct pid *pid)
 {
struct host1x_memory_context_list *cdl = >context_list;
@@ -118,6 +119,10 @@ struct host1x_memory_context 
*host1x_memory_context_alloc(struct host1x *host1x,
for (i = 0; i < cdl->len; i++) {
struct host1x_memory_context *cd = >devs[i];
 
+   if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev) {
+   continue;
+   }
+
if (cd->owner == pid) {
refcount_inc(>ref);
mutex_unlock(>lock);
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index cb2100d9b0ff..ef05de1f4f1e 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -469,6 +469,7 @@ struct host1x_memory_context {
 
 #ifdef CONFIG_IOMMU_API
 struct host1x_memory_context *host1x_memory_context_alloc(struct host1x 
*host1x,
+ struct device *dev,
  struct pid *pid);
 void host1x_memory_context_get(struct host1x_memory_context *cd);
 void host1x_memory_context_put(struct host1x_memory_context *cd);
-- 
2.37.0



[PATCH 0/3] Host1x context isolation on Tegra234

2022-09-07 Thread Mikko Perttunen
From: Mikko Perttunen 

Hi all,

this series adds support for Host1x context isolation
on Tegra234 (Orin). The difference from previous generations
is that there are two IOMMUs to which engines can be attached
to, resulting in having to have a set of contexts for each one.

Patches should be applied in order. The first patch is needed
so that the kernel doesn't attempt to use a context device
attached to the wrong IOMMU when context stream IDs are present
in the device tree. The other two patches have a compile time
dependency.

Thanks,
Mikko

Mikko Perttunen (3):
  gpu: host1x: Select context device based on attached IOMMU
  dt-bindings: Add Host1x context stream IDs on Tegra234
  arm64: tegra: Add context isolation domains on Tegra234

 arch/arm64/boot/dts/nvidia/tegra234.dtsi | 19 +++
 drivers/gpu/drm/tegra/uapi.c |  2 +-
 drivers/gpu/host1x/context.c |  5 +
 include/dt-bindings/memory/tegra234-mc.h | 10 ++
 include/linux/host1x.h   |  1 +
 5 files changed, 36 insertions(+), 1 deletion(-)

-- 
2.37.0



Re: [PATCH 0/8] Support for NVDEC on Tegra234

2022-09-06 Thread Mikko Perttunen

On 9/6/22 20:50, Krzysztof Kozlowski wrote:

On 06/09/2022 15:28, Mikko Perttunen wrote:

From: Mikko Perttunen 

Hi all,

this series adds support for the HW video decoder, NVDEC,
on Tegra234 (Orin). The main change is a switch from Falcon
to RISC-V for the internal microcontroller, which brings along
a change in how the engine is booted. Otherwise it is backwards
compatible with earlier versions.


You need to describe the dependencies, otherwise I would be free to go
with applying memory controllers part.


Hi Krzysztof,

the memory controller patch can be applied independently.

Thanks,
Mikko



Best regards,
Krzysztof




[PATCH 3/8] dt-bindings: Add bindings for Tegra234 NVDEC

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

Update NVDEC bindings for Tegra234. This new engine version only has
two memory clients, but now requires three clocks, and as a bigger
change the engine loads firmware from a secure carveout configured by
the bootloader.

For the latter, we need to add a phandle to the memory controller
to query the location of this carveout, and several other properties
containing offsets into the firmware inside the carveout. These
properties are intended to be populated through a device tree overlay
configured at flashing time, so that the values correspond to the
flashed NVDEC firmware.

Signed-off-by: Mikko Perttunen 
---
 .../gpu/host1x/nvidia,tegra210-nvdec.yaml | 118 +++---
 1 file changed, 98 insertions(+), 20 deletions(-)

diff --git 
a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml 
b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml
index 3cf862976448..27128a195b66 100644
--- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml
@@ -24,17 +24,11 @@ properties:
   - nvidia,tegra210-nvdec
   - nvidia,tegra186-nvdec
   - nvidia,tegra194-nvdec
+  - nvidia,tegra234-nvdec
 
   reg:
 maxItems: 1
 
-  clocks:
-maxItems: 1
-
-  clock-names:
-items:
-  - const: nvdec
-
   resets:
 maxItems: 1
 
@@ -50,18 +44,6 @@ properties:
 
   dma-coherent: true
 
-  interconnects:
-items:
-  - description: DMA read memory client
-  - description: DMA read 2 memory client
-  - description: DMA write memory client
-
-  interconnect-names:
-items:
-  - const: dma-mem
-  - const: read-1
-  - const: write
-
   nvidia,host1x-class:
 description: |
   Host1x class of the engine, used to specify the targeted engine
@@ -79,7 +61,103 @@ required:
   - reset-names
   - power-domains
 
-additionalProperties: false
+unevaluatedProperties: false
+
+allOf:
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - nvidia,tegra234-nvdec
+then:
+  properties:
+clocks:
+  items:
+- description: NVDEC clock
+- description: FUSE clock
+- description: TSEC_PKA clock
+clock-names:
+  items:
+- const: nvdec
+- const: fuse
+- const: tsec_pka
+interconnects:
+  items:
+- description: DMA read memory client
+- description: DMA write memory client
+interconnect-names:
+  items:
+- const: dma-mem
+- const: write
+nvidia,memory-controller:
+  $ref: /schemas/types.yaml#/definitions/phandle
+  description:
+phandle to the memory controller for determining carveout 
information.
+nvidia,bl-manifest-offset:
+  $ref: /schemas/types.yaml#/definitions/uint32
+  description:
+Offset to bootloader manifest from beginning of firmware. 
Typically set as
+part of a device tree overlay corresponding to flashed firmware.
+nvidia,bl-code-offset:
+  $ref: /schemas/types.yaml#/definitions/uint32
+  description:
+Offset to bootloader code section from beginning of firmware. 
Typically set as
+part of a device tree overlay corresponding to flashed firmware.
+nvidia,bl-data-offset:
+  $ref: /schemas/types.yaml#/definitions/uint32
+  description:
+Offset to bootloader data section from beginning of firmware. 
Typically set as
+part of a device tree overlay corresponding to flashed firmware.
+nvidia,os-manifest-offset:
+  $ref: /schemas/types.yaml#/definitions/uint32
+  description:
+Offset to operating system manifest from beginning of firmware. 
Typically set as
+part of a device tree overlay corresponding to flashed firmware.
+nvidia,os-code-offset:
+  $ref: /schemas/types.yaml#/definitions/uint32
+  description:
+Offset to operating system code section from beginning of 
firmware. Typically set as
+part of a device tree overlay corresponding to flashed firmware.
+nvidia,os-data-offset:
+  $ref: /schemas/types.yaml#/definitions/uint32
+  description:
+Offset to operating system data section from beginning of 
firmware. Typically set as
+part of a device tree overlay corresponding to flashed firmware.
+  required:
+- nvidia,memory-controller
+- nvidia,bl-manifest-offset
+- nvidia,bl-code-offset
+- nvidia,bl-data-offset
+- nvidia,os-manifest-offset
+- nvidia,os-code-offset
+- nvidia,os-data-offset
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - nvidia

[PATCH 2/8] dt-bindings: Add headers for NVDEC on Tegra234

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

Add clock, memory controller, powergate and reset dt-binding headers
necessary for NVDEC.

Signed-off-by: Mikko Perttunen 
---
 include/dt-bindings/clock/tegra234-clock.h | 4 
 include/dt-bindings/memory/tegra234-mc.h   | 3 +++
 include/dt-bindings/power/tegra234-powergate.h | 1 +
 include/dt-bindings/reset/tegra234-reset.h | 1 +
 4 files changed, 9 insertions(+)

diff --git a/include/dt-bindings/clock/tegra234-clock.h 
b/include/dt-bindings/clock/tegra234-clock.h
index 173364a93381..25b4a3fb4588 100644
--- a/include/dt-bindings/clock/tegra234-clock.h
+++ b/include/dt-bindings/clock/tegra234-clock.h
@@ -82,6 +82,8 @@
 #define TEGRA234_CLK_I2S6  66U
 /** @brief clock recovered from I2S6 input */
 #define TEGRA234_CLK_I2S6_SYNC_INPUT   67U
+/** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_NVDEC */
+#define TEGRA234_CLK_NVDEC  83U
 /** PLL controlled by CLK_RST_CONTROLLER_PLLA_BASE for use by audio clocks */
 #define TEGRA234_CLK_PLLA  93U
 /** @brief PLLP clk output */
@@ -130,6 +132,8 @@
 #define TEGRA234_CLK_SYNC_I2S5 149U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_AUDIO_SYNC_CLK_I2S6 
*/
 #define TEGRA234_CLK_SYNC_I2S6 150U
+/** output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_PKA */
+#define TEGRA234_CLK_TSEC_PKA   154U
 /** @brief output of mux controlled by CLK_RST_CONTROLLER_CLK_SOURCE_UARTA */
 #define TEGRA234_CLK_UARTA 155U
 /** @brief output of gate CLK_ENB_PEX1_CORE_6 */
diff --git a/include/dt-bindings/memory/tegra234-mc.h 
b/include/dt-bindings/memory/tegra234-mc.h
index 62987b47ce81..75f0bd30d365 100644
--- a/include/dt-bindings/memory/tegra234-mc.h
+++ b/include/dt-bindings/memory/tegra234-mc.h
@@ -32,6 +32,7 @@
 #define TEGRA234_SID_PCIE100x0b
 #define TEGRA234_SID_BPMP  0x10
 #define TEGRA234_SID_HOST1X0x27
+#define TEGRA234_SID_NVDEC 0x29
 #define TEGRA234_SID_VIC   0x34
 
 /*
@@ -91,6 +92,8 @@
 #define TEGRA234_MEMORY_CLIENT_SDMMCWAB 0x67
 #define TEGRA234_MEMORY_CLIENT_VICSRD 0x6c
 #define TEGRA234_MEMORY_CLIENT_VICSWR 0x6d
+#define TEGRA234_MEMORY_CLIENT_NVDECSRD 0x78
+#define TEGRA234_MEMORY_CLIENT_NVDECSWR 0x79
 /* BPMP read client */
 #define TEGRA234_MEMORY_CLIENT_BPMPR 0x93
 /* BPMP write client */
diff --git a/include/dt-bindings/power/tegra234-powergate.h 
b/include/dt-bindings/power/tegra234-powergate.h
index ae9286cef85c..e5dc1e00be95 100644
--- a/include/dt-bindings/power/tegra234-powergate.h
+++ b/include/dt-bindings/power/tegra234-powergate.h
@@ -19,6 +19,7 @@
 #define TEGRA234_POWER_DOMAIN_MGBEB18U
 #define TEGRA234_POWER_DOMAIN_MGBEC19U
 #define TEGRA234_POWER_DOMAIN_MGBED20U
+#define TEGRA234_POWER_DOMAIN_NVDEC 23U
 #define TEGRA234_POWER_DOMAIN_VIC  29U
 
 #endif
diff --git a/include/dt-bindings/reset/tegra234-reset.h 
b/include/dt-bindings/reset/tegra234-reset.h
index d48d22b2bc7f..17163019316c 100644
--- a/include/dt-bindings/reset/tegra234-reset.h
+++ b/include/dt-bindings/reset/tegra234-reset.h
@@ -30,6 +30,7 @@
 #define TEGRA234_RESET_I2C733U
 #define TEGRA234_RESET_I2C834U
 #define TEGRA234_RESET_I2C935U
+#define TEGRA234_RESET_NVDEC44U
 #define TEGRA234_RESET_MGBE0_PCS   45U
 #define TEGRA234_RESET_MGBE0_MAC   46U
 #define TEGRA234_RESET_MGBE1_PCS   49U
-- 
2.37.0



[PATCH 1/8] memory: tegra: Add API for retrieving carveout bounds

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra234 NVDEC firmware is loaded from a secure carveout, where it
has been loaded by a bootloader. When booting NVDEC, we need to tell it
the address of this firmware, which we can determine by checking the
starting address of the carveout. As such, add an MC API to query the
bounds of carveouts, and add related information on Tegra234.

Signed-off-by: Mikko Perttunen 
---
 drivers/memory/tegra/mc.c   | 23 +++
 drivers/memory/tegra/tegra234.c |  5 +
 include/soc/tegra/mc.h  | 11 +++
 3 files changed, 39 insertions(+)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 2f7a58a9df1a..4650300d3ec3 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -107,6 +107,29 @@ int tegra_mc_probe_device(struct tegra_mc *mc, struct 
device *dev)
 }
 EXPORT_SYMBOL_GPL(tegra_mc_probe_device);
 
+int tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size)
+{
+   u32 offset;
+
+   if (id < 1 || id >= mc->soc->num_carveouts)
+   return -EINVAL;
+
+   if (id < 6)
+   offset = 0xc0c + 0x50 * (id - 1);
+   else
+   offset = 0x2004 + 0x50 * (id - 6);
+
+   *base = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 0x0);
+   *base |= (phys_addr_t)mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 
0x4) << 32;
+
+   if (size)
+   *size = mc_ch_readl(mc, MC_BROADCAST_CHANNEL, offset + 0x8) << 
17;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(tegra_mc_get_carveout_info);
+
 static int tegra_mc_block_dma_common(struct tegra_mc *mc,
 const struct tegra_mc_reset *rst)
 {
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index a9e8fd99730f..74d291d66366 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -187,4 +187,9 @@ const struct tegra_mc_soc tegra234_mc_soc = {
.ops = _mc_ops,
.ch_intmask = 0xff00,
.global_intstatus_channel_shift = 8,
+   /*
+* Additionally, there are lite carveouts but those are not currently
+* supported.
+*/
+   .num_carveouts = 32,
 };
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 47ce6d434427..51a2263e1bc5 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -193,6 +193,8 @@ struct tegra_mc_soc {
unsigned int num_address_bits;
unsigned int atom_size;
 
+   unsigned int num_carveouts;
+
u16 client_id_mask;
u8 num_channels;
 
@@ -244,6 +246,8 @@ unsigned int tegra_mc_get_emem_device_count(struct tegra_mc 
*mc);
 #ifdef CONFIG_TEGRA_MC
 struct tegra_mc *devm_tegra_memory_controller_get(struct device *dev);
 int tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev);
+int tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size);
 #else
 static inline struct tegra_mc *
 devm_tegra_memory_controller_get(struct device *dev)
@@ -256,6 +260,13 @@ tegra_mc_probe_device(struct tegra_mc *mc, struct device 
*dev)
 {
return -ENODEV;
 }
+
+static inline int
+tegra_mc_get_carveout_info(struct tegra_mc *mc, unsigned int id,
+   phys_addr_t *base, u64 *size)
+{
+   return -ENODEV;
+}
 #endif
 
 #endif /* __SOC_TEGRA_MC_H__ */
-- 
2.37.0



[PATCH 0/8] Support for NVDEC on Tegra234

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

Hi all,

this series adds support for the HW video decoder, NVDEC,
on Tegra234 (Orin). The main change is a switch from Falcon
to RISC-V for the internal microcontroller, which brings along
a change in how the engine is booted. Otherwise it is backwards
compatible with earlier versions.

In previous iterations, firmware was simply loaded from disk and
written into engine internal memory. Now, the engine has a
bootrom that loads the firmware from a carveout where it has been
loaded by the system bootloader; however, we still need to tell it
where that carveout is loaded and some offsets into it. For that,
the first patch adds a new memory controller API to query the
carveout address. The offsets are read from device tree -- the
expectation is that at flashing time (when the firmware is also
flashed), the flasher also delivers a device tree overlay with
values corresponding to the flashed firmware.

The currently available Linux for Tegra release doesn't yet
include this device tree overlay flashing, and the firmware version
it contains is incompatible with this series. The plan is to fix
that for the next Linux for Tegra release, but if necessary, we
can postpone merging of this series to once those changes are
available.

Thanks!
Mikko

Mikko Perttunen (8):
  memory: tegra: Add API for retrieving carveout bounds
  dt-bindings: Add headers for NVDEC on Tegra234
  dt-bindings: Add bindings for Tegra234 NVDEC
  arm64: tegra: Add NVDEC on Tegra234
  gpu: host1x: Add stream ID register data for NVDEC on Tegra234
  drm/tegra: nvdec: Support multiple clocks
  drm/tegra: Add code for booting RISC-V based engines
  drm/tegra: Add Tegra234 support to NVDEC driver

 .../gpu/host1x/nvidia,tegra210-nvdec.yaml | 118 ++--
 arch/arm64/boot/dts/nvidia/tegra234.dtsi  |  27 +++
 drivers/gpu/drm/tegra/Makefile|   3 +-
 drivers/gpu/drm/tegra/drm.c   |   1 +
 drivers/gpu/drm/tegra/nvdec.c | 171 +++---
 drivers/gpu/drm/tegra/riscv.c | 106 +++
 drivers/gpu/drm/tegra/riscv.h |  30 +++
 drivers/gpu/host1x/dev.c  |  12 ++
 drivers/memory/tegra/mc.c |  23 +++
 drivers/memory/tegra/tegra234.c   |   5 +
 include/dt-bindings/clock/tegra234-clock.h|   4 +
 include/dt-bindings/memory/tegra234-mc.h  |   3 +
 .../dt-bindings/power/tegra234-powergate.h|   1 +
 include/dt-bindings/reset/tegra234-reset.h|   1 +
 include/soc/tegra/mc.h|  11 ++
 15 files changed, 470 insertions(+), 46 deletions(-)
 create mode 100644 drivers/gpu/drm/tegra/riscv.c
 create mode 100644 drivers/gpu/drm/tegra/riscv.h

-- 
2.37.0



[PATCH 4/8] arm64: tegra: Add NVDEC on Tegra234

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

Add a device tree node for NVDEC on Tegra234.

Booting the firmware requires some information regarding offsets
within the firmware binary. These are passed through the device
tree, but since the values vary depending on the firmware version,
and the firmware itself is not available to the OS, the flasher is
expected to provide a device tree overlay with values corresponding
to the firmware it is flashing. The overlay then replaces the
placeholder values here.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra234.dtsi | 27 
 1 file changed, 27 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index 81a0f599685f..65d49b27bc5f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -586,6 +586,33 @@ vic@1534 {
iommus = <_niso1 TEGRA234_SID_VIC>;
dma-coherent;
};
+
+   nvdec@1548 {
+   compatible = "nvidia,tegra234-nvdec";
+   reg = <0x1548 0x0004>;
+   clocks = < TEGRA234_CLK_NVDEC>,
+< TEGRA234_CLK_FUSE>,
+< TEGRA234_CLK_TSEC_PKA>;
+   clock-names = "nvdec", "fuse", "tsec_pka";
+   resets = < TEGRA234_RESET_NVDEC>;
+   reset-names = "nvdec";
+   power-domains = < 
TEGRA234_POWER_DOMAIN_NVDEC>;
+   interconnects = < 
TEGRA234_MEMORY_CLIENT_NVDECSRD >,
+   < 
TEGRA234_MEMORY_CLIENT_NVDECSWR >;
+   interconnect-names = "dma-mem", "write";
+   iommus = <_niso1 TEGRA234_SID_NVDEC>;
+   dma-coherent;
+
+   nvidia,memory-controller = <>;
+
+   /* Placeholder values, to be replaced with 
values from overlay */
+   nvidia,bl-manifest-offset = <0>;
+   nvidia,bl-data-offset = <0>;
+   nvidia,bl-code-offset = <0>;
+   nvidia,os-manifest-offset = <0>;
+   nvidia,os-data-offset = <0>;
+   nvidia,os-code-offset = <0>;
+   };
};
 
gpio: gpio@220 {
-- 
2.37.0



[PATCH 5/8] gpu: host1x: Add stream ID register data for NVDEC on Tegra234

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

Add entries for NVDEC to the Tegra234 SID table.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 0cd3f97e7e49..d6b4614f968f 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -225,6 +225,18 @@ static const struct host1x_sid_entry tegra234_sid_table[] 
= {
.offset = 0x34,
.limit = 0x34
},
+   {
+   /* NVDEC channel */
+   .base = 0x17c8,
+   .offset = 0x30,
+   .limit = 0x30,
+   },
+   {
+   /* NVDEC MMIO */
+   .base = 0x1698,
+   .offset = 0x34,
+   .limit = 0x34,
+   },
 };
 
 static const struct host1x_info host1x08_info = {
-- 
2.37.0



[PATCH 6/8] drm/tegra: nvdec: Support multiple clocks

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

NVDEC on Tegra234 requires multiple clocks. Add support for that.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/nvdec.c | 31 +--
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 276fe0472730..05af4d107421 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2015-2021, NVIDIA Corporation.
+ * Copyright (c) 2015-2022, NVIDIA Corporation.
  */
 
 #include 
@@ -28,6 +28,7 @@ struct nvdec_config {
const char *firmware;
unsigned int version;
bool supports_sid;
+   bool has_extra_clocks;
 };
 
 struct nvdec {
@@ -37,7 +38,8 @@ struct nvdec {
struct tegra_drm_client client;
struct host1x_channel *channel;
struct device *dev;
-   struct clk *clk;
+   struct clk_bulk_data clks[3];
+   unsigned int num_clks;
 
/* Platform configuration */
const struct nvdec_config *config;
@@ -258,7 +260,7 @@ static __maybe_unused int nvdec_runtime_resume(struct 
device *dev)
struct nvdec *nvdec = dev_get_drvdata(dev);
int err;
 
-   err = clk_prepare_enable(nvdec->clk);
+   err = clk_bulk_prepare_enable(nvdec->num_clks, nvdec->clks);
if (err < 0)
return err;
 
@@ -275,7 +277,7 @@ static __maybe_unused int nvdec_runtime_resume(struct 
device *dev)
return 0;
 
 disable:
-   clk_disable_unprepare(nvdec->clk);
+   clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
return err;
 }
 
@@ -285,7 +287,7 @@ static __maybe_unused int nvdec_runtime_suspend(struct 
device *dev)
 
host1x_channel_stop(nvdec->channel);
 
-   clk_disable_unprepare(nvdec->clk);
+   clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
 
return 0;
 }
@@ -383,13 +385,22 @@ static int nvdec_probe(struct platform_device *pdev)
if (IS_ERR(nvdec->regs))
return PTR_ERR(nvdec->regs);
 
-   nvdec->clk = devm_clk_get(dev, NULL);
-   if (IS_ERR(nvdec->clk)) {
-   dev_err(>dev, "failed to get clock\n");
-   return PTR_ERR(nvdec->clk);
+   nvdec->clks[0].id = "nvdec";
+   nvdec->num_clks = 1;
+
+   if (nvdec->config->has_extra_clocks) {
+   nvdec->num_clks = 3;
+   nvdec->clks[1].id = "fuse";
+   nvdec->clks[2].id = "tsec_pka";
+   }
+
+   err = devm_clk_bulk_get(dev, nvdec->num_clks, nvdec->clks);
+   if (err) {
+   dev_err(>dev, "failed to get clock(s)\n");
+   return err;
}
 
-   err = clk_set_rate(nvdec->clk, ULONG_MAX);
+   err = clk_set_rate(nvdec->clks[0].clk, ULONG_MAX);
if (err < 0) {
dev_err(>dev, "failed to set clock rate\n");
return err;
-- 
2.37.0



[PATCH 7/8] drm/tegra: Add code for booting RISC-V based engines

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

Add helper code for booting RISC-V based engines where firmware is
located in a carveout.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/Makefile |   3 +-
 drivers/gpu/drm/tegra/riscv.c  | 106 +
 drivers/gpu/drm/tegra/riscv.h  |  30 ++
 3 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tegra/riscv.c
 create mode 100644 drivers/gpu/drm/tegra/riscv.h

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index df6cc986aeba..bb0d2c144b55 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -24,7 +24,8 @@ tegra-drm-y := \
gr3d.o \
falcon.o \
vic.o \
-   nvdec.o
+   nvdec.o \
+   riscv.o
 
 tegra-drm-y += trace.o
 
diff --git a/drivers/gpu/drm/tegra/riscv.c b/drivers/gpu/drm/tegra/riscv.c
new file mode 100644
index ..6580416408f8
--- /dev/null
+++ b/drivers/gpu/drm/tegra/riscv.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, NVIDIA Corporation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "riscv.h"
+
+#define RISCV_CPUCTL   0x4388
+#define RISCV_CPUCTL_STARTCPU_TRUE (1 << 0)
+#define RISCV_BR_RETCODE   0x465c
+#define RISCV_BR_RETCODE_RESULT_V(x)   ((x) & 0x3)
+#define RISCV_BR_RETCODE_RESULT_PASS_V 3
+#define RISCV_BCR_CTRL 0x4668
+#define RISCV_BCR_CTRL_CORE_SELECT_RISCV   (1 << 4)
+#define RISCV_BCR_DMACFG   0x466c
+#define RISCV_BCR_DMACFG_TARGET_LOCAL_FB   (0 << 0)
+#define RISCV_BCR_DMACFG_LOCK_LOCKED   (1 << 31)
+#define RISCV_BCR_DMAADDR_PKCPARAM_LO  0x4670
+#define RISCV_BCR_DMAADDR_PKCPARAM_HI  0x4674
+#define RISCV_BCR_DMAADDR_FMCCODE_LO   0x4678
+#define RISCV_BCR_DMAADDR_FMCCODE_HI   0x467c
+#define RISCV_BCR_DMAADDR_FMCDATA_LO   0x4680
+#define RISCV_BCR_DMAADDR_FMCDATA_HI   0x4684
+#define RISCV_BCR_DMACFG_SEC   0x4694
+#define RISCV_BCR_DMACFG_SEC_GSCID(v)  ((v) << 16)
+
+static void riscv_writel(struct tegra_drm_riscv *riscv, u32 value, u32 offset)
+{
+   writel(value, riscv->regs + offset);
+}
+
+int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv)
+{
+   struct tegra_drm_riscv_descriptor *bl = >bl_desc;
+   struct tegra_drm_riscv_descriptor *os = >os_desc;
+   const struct device_node *np = riscv->dev->of_node;
+   int err;
+
+#define READ_PROP(name, location) \
+   err = of_property_read_u32(np, name, location); \
+   if (err) { \
+   dev_err(riscv->dev, "failed to read " name ": %d\n", err); \
+   return err; \
+   }
+
+   READ_PROP("nvidia,bl-manifest-offset", >manifest_offset);
+   READ_PROP("nvidia,bl-code-offset", >code_offset);
+   READ_PROP("nvidia,bl-data-offset", >data_offset);
+   READ_PROP("nvidia,os-manifest-offset", >manifest_offset);
+   READ_PROP("nvidia,os-code-offset", >code_offset);
+   READ_PROP("nvidia,os-data-offset", >data_offset);
+#undef READ_PROP
+
+   if (bl->manifest_offset == 0 && bl->code_offset == 0 &&
+   bl->data_offset == 0 && os->manifest_offset == 0 &&
+   os->code_offset == 0 && os->data_offset == 0) {
+   dev_err(riscv->dev, "descriptors not available\n");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t 
image_address,
+u32 gscid, const struct 
tegra_drm_riscv_descriptor *desc)
+{
+   phys_addr_t addr;
+   int err;
+   u32 val;
+
+   riscv_writel(riscv, RISCV_BCR_CTRL_CORE_SELECT_RISCV, RISCV_BCR_CTRL);
+
+   addr = image_address + desc->manifest_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_PKCPARAM_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_PKCPARAM_HI);
+
+   addr = image_address + desc->code_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCCODE_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCCODE_HI);
+
+   addr = image_address + desc->data_offset;
+   riscv_writel(riscv, lower_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCDATA_LO);
+   riscv_writel(riscv, upper_32_bits(addr >> 8), 
RISCV_BCR_DMAADDR_FMCDATA_HI);
+
+   riscv_writel(riscv

[PATCH 8/8] drm/tegra: Add Tegra234 support to NVDEC driver

2022-09-06 Thread Mikko Perttunen
From: Mikko Perttunen 

Add support for the Tegra234 version of NVDEC to the NVDEC driver.
This version sports a RISC-V controller and requires a few additional
clocks. After firmware has been loaded, the behavior is, however,
backwards compatible.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c   |   1 +
 drivers/gpu/drm/tegra/nvdec.c | 140 ++
 2 files changed, 126 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6748ec1e0005..a014f11e9edb 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1382,6 +1382,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
{ .compatible = "nvidia,tegra194-vic", },
{ .compatible = "nvidia,tegra194-nvdec", },
{ .compatible = "nvidia,tegra234-vic", },
+   { .compatible = "nvidia,tegra234-nvdec", },
{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 05af4d107421..10fd21517281 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -16,18 +17,21 @@
 #include 
 #include 
 
-#include 
+#include 
 
 #include "drm.h"
 #include "falcon.h"
+#include "riscv.h"
 #include "vic.h"
 
+#define NVDEC_FALCON_DEBUGINFO 0x1094
 #define NVDEC_TFBIF_TRANSCFG   0x2c44
 
 struct nvdec_config {
const char *firmware;
unsigned int version;
bool supports_sid;
+   bool has_riscv;
bool has_extra_clocks;
 };
 
@@ -40,9 +44,14 @@ struct nvdec {
struct device *dev;
struct clk_bulk_data clks[3];
unsigned int num_clks;
+   struct reset_control *reset;
 
/* Platform configuration */
const struct nvdec_config *config;
+
+   /* RISC-V specific data */
+   struct tegra_drm_riscv riscv;
+   phys_addr_t carveout_base;
 };
 
 static inline struct nvdec *to_nvdec(struct tegra_drm_client *client)
@@ -56,7 +65,7 @@ static inline void nvdec_writel(struct nvdec *nvdec, u32 
value,
writel(value, nvdec->regs + offset);
 }
 
-static int nvdec_boot(struct nvdec *nvdec)
+static int nvdec_boot_falcon(struct nvdec *nvdec)
 {
 #ifdef CONFIG_IOMMU_API
struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev);
@@ -92,6 +101,64 @@ static int nvdec_boot(struct nvdec *nvdec)
return 0;
 }
 
+static int nvdec_wait_debuginfo(struct nvdec *nvdec, const char *phase)
+{
+   int err;
+   u32 val;
+
+   err = readl_poll_timeout(nvdec->regs + NVDEC_FALCON_DEBUGINFO, val, val 
== 0x0, 10, 10);
+   if (err) {
+   dev_err(nvdec->dev, "failed to boot %s, debuginfo=0x%x\n", 
phase, val);
+   return err;
+   }
+
+   return 0;
+}
+
+static int nvdec_boot_riscv(struct nvdec *nvdec)
+{
+   int err;
+
+   err = reset_control_acquire(nvdec->reset);
+   if (err)
+   return err;
+
+   nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+   err = tegra_drm_riscv_boot_bootrom(>riscv, nvdec->carveout_base, 
1,
+  >riscv.bl_desc);
+   if (err) {
+   dev_err(nvdec->dev, "failed to execute bootloader\n");
+   goto release_reset;
+   }
+
+   err = nvdec_wait_debuginfo(nvdec, "bootloader");
+   if (err)
+   goto release_reset;
+
+   err = reset_control_reset(nvdec->reset);
+   if (err)
+   goto release_reset;
+
+   nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+   err = tegra_drm_riscv_boot_bootrom(>riscv, nvdec->carveout_base, 
1,
+  >riscv.os_desc);
+   if (err) {
+   dev_err(nvdec->dev, "failed to execute firmware\n");
+   goto release_reset;
+   }
+
+   err = nvdec_wait_debuginfo(nvdec, "firmware");
+   if (err)
+   goto release_reset;
+
+release_reset:
+   reset_control_release(nvdec->reset);
+
+   return err;
+}
+
 static int nvdec_init(struct host1x_client *client)
 {
struct tegra_drm_client *drm = host1x_to_drm_client(client);
@@ -191,7 +258,7 @@ static const struct host1x_client_ops nvdec_client_ops = {
.exit = nvdec_exit,
 };
 
-static int nvdec_load_firmware(struct nvdec *nvdec)
+static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
 {
struct host1x_client *client = >client.base;
struct tegra_drm *tegra = nvdec->client.drm;
@@ -254,7 +321,6 @@ static int nvdec_load_firmware(struct nvdec *nvdec)
return err;
 }
 
-
 static __maybe_unused int nvdec_runtime_resume(struct device *dev)
 {
struct nvdec *nvdec 

[PATCH 1/2] gpu: host1x: Initialize syncval in channel_submit

2022-07-08 Thread Mikko Perttunen
From: Mikko Perttunen 

During the refactoring of channel_submit, assignment of syncval
was moved but it is also used in channel_submit. Add this assignment
back to channel_submit as well.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/channel_hw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c 
b/drivers/gpu/host1x/hw/channel_hw.c
index 4eb7fb2e4f0a..732abe0750ff 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -319,6 +319,7 @@ static int channel_submit(struct host1x_job *job)
}
 
channel_program_cdma(job);
+   syncval = host1x_syncpt_read_max(sp);
 
/* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(>cdma, job);
-- 
2.36.1



[PATCH 2/2] gpu: host1x: Generalize host1x_cdma_push_wide

2022-07-08 Thread Mikko Perttunen
From: Mikko Perttunen 

host1x_cdma_push_wide had the assumptions that the last parameter word
was a NOP opcode, and that NOP opcodes could be used in all situations.

Neither are true with the new job opcode sequence, so adjust the
function to not have these assumptions, and instead place an early
RESTART opcode when necessary to jump back to the beginning of the
pushbuffer.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/cdma.c | 24 +---
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index bb1f3c746be4..103fda055394 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -615,8 +615,8 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 
op1, u32 op2,
struct host1x_channel *channel = cdma_to_channel(cdma);
struct host1x *host1x = cdma_to_host1x(cdma);
struct push_buffer *pb = >push_buffer;
-   unsigned int needed = 2, extra = 0, i;
unsigned int space = cdma->slots_free;
+   unsigned int needed = 2, extra = 0;
 
if (host1x_debug_trace_cmdbuf)
trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
@@ -634,20 +634,14 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 
op1, u32 op2,
cdma->slots_free = space - needed;
cdma->slots_used += needed;
 
-   /*
-* Note that we rely on the fact that this is only used to submit wide
-* gather opcodes, which consist of 3 words, and they are padded with
-* a NOP to avoid having to deal with fractional slots (a slot always
-* represents 2 words). The fourth opcode passed to this function will
-* therefore always be a NOP.
-*
-* This works around a slight ambiguity when it comes to opcodes. For
-* all current host1x incarnations the NOP opcode uses the exact same
-* encoding (0x2000), so we could hard-code the value here, but a
-* new incarnation may change it and break that assumption.
-*/
-   for (i = 0; i < extra; i++)
-   host1x_pushbuffer_push(pb, op4, op4);
+   if (extra > 0) {
+   /*
+* If there isn't enough space at the tail of the pushbuffer,
+* insert a RESTART(0) here to go back to the beginning.
+* The code above adjusted the indexes appropriately.
+*/
+   host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead);
+   }
 
host1x_pushbuffer_push(pb, op1, op2);
host1x_pushbuffer_push(pb, op3, op4);
-- 
2.36.1



Re: [PATCH] gpu: host1x: Register context bus unconditionally

2022-07-08 Thread Mikko Perttunen

On 7/7/22 20:30, Robin Murphy wrote:

Conditional registration is a problem for other subsystems which may
unwittingly try to interact with host1x_context_device_bus_type in an
uninitialised state on non-Tegra platforms. A look under /sys/bus on a
typical system already reveals plenty of entries from enabled but
otherwise irrelevant configs, so lets keep things simple and register
our context bus unconditionally too.

Signed-off-by: Robin Murphy 
---
  drivers/gpu/host1x/context_bus.c | 5 -
  1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c
index b0d35b2bbe89..d9421179d7b4 100644
--- a/drivers/gpu/host1x/context_bus.c
+++ b/drivers/gpu/host1x/context_bus.c
@@ -15,11 +15,6 @@ static int __init host1x_context_device_bus_init(void)
  {
int err;
  
-	if (!of_machine_is_compatible("nvidia,tegra186") &&

-   !of_machine_is_compatible("nvidia,tegra194") &&
-   !of_machine_is_compatible("nvidia,tegra234"))
-   return 0;
-
err = bus_register(_context_device_bus_type);
if (err < 0) {
pr_err("bus type registration failed: %d\n", err);


Reviewed-by: Mikko Perttunen 

Cheers,
Mikko


[PATCH v7/v3 16/22] gpu: host1x: Allow reset to be missing

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Host1x on Tegra234 does not have a software-controllable reset line.
As such, don't bail out if we don't find one in the device tree.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 5a57165ab0f6..c28939850bf6 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -423,9 +423,6 @@ static int host1x_get_resets(struct host1x *host)
return err;
}
 
-   if (WARN_ON(!host->resets[1].rstc))
-   return -ENOENT;
-
return 0;
 }
 
-- 
2.36.1



[PATCH v7/v3 04/22] arm64: tegra: Add Host1x context stream IDs on Tegra186+

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Add Host1x context stream IDs on systems that support Host1x context
isolation. Host1x and attached engines can use these stream IDs to
allow isolation between memory used by different processes.

The specified stream IDs must match those configured by the hypervisor,
if one is present.

Signed-off-by: Mikko Perttunen 
---
v2:
* Added context devices on T194.
* Use iommu-map instead of custom property.
v4:
* Remove memory-contexts subnode.
---
 arch/arm64/boot/dts/nvidia/tegra186.dtsi | 11 +++
 arch/arm64/boot/dts/nvidia/tegra194.dtsi | 11 +++
 2 files changed, 22 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 0e9afc3e2f26..5f560f13ed93 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -1461,6 +1461,17 @@ host1x@13e0 {
 
iommus = < TEGRA186_SID_HOST1X>;
 
+   /* Context isolation domains */
+   iommu-map = <
+   0  TEGRA186_SID_HOST1X_CTX0 1
+   1  TEGRA186_SID_HOST1X_CTX1 1
+   2  TEGRA186_SID_HOST1X_CTX2 1
+   3  TEGRA186_SID_HOST1X_CTX3 1
+   4  TEGRA186_SID_HOST1X_CTX4 1
+   5  TEGRA186_SID_HOST1X_CTX5 1
+   6  TEGRA186_SID_HOST1X_CTX6 1
+   7  TEGRA186_SID_HOST1X_CTX7 1>;
+
dpaux1: dpaux@1504 {
compatible = "nvidia,tegra186-dpaux";
reg = <0x1504 0x1>;
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index d1f8248c00f4..613fd71dec25 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1769,6 +1769,17 @@ host1x@13e0 {
interconnect-names = "dma-mem";
iommus = < TEGRA194_SID_HOST1X>;
 
+   /* Context isolation domains */
+   iommu-map = <
+   0  TEGRA194_SID_HOST1X_CTX0 1
+   1  TEGRA194_SID_HOST1X_CTX1 1
+   2  TEGRA194_SID_HOST1X_CTX2 1
+   3  TEGRA194_SID_HOST1X_CTX3 1
+   4  TEGRA194_SID_HOST1X_CTX4 1
+   5  TEGRA194_SID_HOST1X_CTX5 1
+   6  TEGRA194_SID_HOST1X_CTX6 1
+   7  TEGRA194_SID_HOST1X_CTX7 1>;
+
nvdec@1514 {
compatible = "nvidia,tegra194-nvdec";
reg = <0x1514 0x0004>;
-- 
2.36.1



[PATCH v7/v3 20/22] gpu: host1x: Add MLOCK release code on Tegra234

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

With the full-featured opcode sequence using MLOCKs, we need to also
unlock those MLOCKs in the event of a timeout. However, it turns out
that on Tegra186/Tegra194, by default, we don't need to do this;
furthermore, on Tegra234 it is much simpler to do; so only implement
this on Tegra234 for the time being.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/cdma_hw.c| 34 ++
 drivers/gpu/host1x/hw/hw_host1x08_common.h |  7 +
 2 files changed, 41 insertions(+)

diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index e49cd5b8f735..1b65a10b9dfc 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -238,6 +238,37 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 
getptr)
cdma_timeout_restart(cdma, getptr);
 }
 
+static void timeout_release_mlock(struct host1x_cdma *cdma)
+{
+#if HOST1X_HW >= 8
+   /* Tegra186 and Tegra194 require a more complicated MLOCK release
+* sequence. Furthermore, those chips by default don't enforce MLOCKs,
+* so it turns out that if we don't /actually/ need MLOCKs, we can just
+* ignore them.
+*
+* As such, for now just implement this on Tegra234 where things are
+* stricter but also easy to implement.
+*/
+   struct host1x_channel *ch = cdma_to_channel(cdma);
+   struct host1x *host1x = cdma_to_host1x(cdma);
+   u32 offset;
+
+   switch (ch->client->class) {
+   case HOST1X_CLASS_VIC:
+   offset = HOST1X_COMMON_VIC_MLOCK;
+   break;
+   case HOST1X_CLASS_NVDEC:
+   offset = HOST1X_COMMON_NVDEC_MLOCK;
+   break;
+   default:
+   WARN(1, "%s was not updated for class %u", __func__, 
ch->client->class);
+   return;
+   }
+
+   host1x_common_writel(host1x, 0x0, offset);
+#endif
+}
+
 /*
  * If this timeout fires, it indicates the current sync_queue entry has
  * exceeded its TTL and the userctx should be timed out and remaining
@@ -288,6 +319,9 @@ static void cdma_timeout_handler(struct work_struct *work)
/* stop HW, resetting channel/module */
host1x_hw_cdma_freeze(host1x, cdma);
 
+   /* release any held MLOCK */
+   timeout_release_mlock(cdma);
+
host1x_cdma_update_sync_queue(cdma, ch->dev);
mutex_unlock(>lock);
 }
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h 
b/drivers/gpu/host1x/hw/hw_host1x08_common.h
index 4df28440b86b..8e0c99150ec2 100644
--- a/drivers/gpu/host1x/hw/hw_host1x08_common.h
+++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h
@@ -2,3 +2,10 @@
 /*
  * Copyright (c) 2022 NVIDIA Corporation.
  */
+
+#define HOST1X_COMMON_OFA_MLOCK0x4050
+#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070
+#define HOST1X_COMMON_VIC_MLOCK0x4078
+#define HOST1X_COMMON_NVENC_MLOCK  0x407c
+#define HOST1X_COMMON_NVDEC_MLOCK  0x4080
+#define HOST1X_COMMON_NVJPG_MLOCK  0x4084
-- 
2.36.1



[PATCH v7/v3 13/22] gpu: host1x: Deduplicate hardware headers

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Host1x class information and opcodes are unchanged or backwards
compatible across SoCs so let's not duplicate them for each one
but have them in a shared header file.

At the same time, add opcode functions for acquire/release_mlock.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/host1x01_hardware.h | 114 +---
 drivers/gpu/host1x/hw/host1x02_hardware.h | 113 +---
 drivers/gpu/host1x/hw/host1x04_hardware.h | 113 +---
 drivers/gpu/host1x/hw/host1x05_hardware.h | 113 +---
 drivers/gpu/host1x/hw/host1x06_hardware.h | 128 +-
 drivers/gpu/host1x/hw/host1x07_hardware.h | 128 +-
 drivers/gpu/host1x/hw/opcodes.h   | 150 ++
 7 files changed, 156 insertions(+), 703 deletions(-)
 create mode 100644 drivers/gpu/host1x/hw/opcodes.h

diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h 
b/drivers/gpu/host1x/hw/host1x01_hardware.h
index fe59df1d3dc3..cb93d7c1808c 100644
--- a/drivers/gpu/host1x/hw/host1x01_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -15,118 +15,6 @@
 #include "hw_host1x01_sync.h"
 #include "hw_host1x01_uclass.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-   unsigned indx, unsigned threshold)
-{
-   return host1x_uclass_wait_syncpt_indx_f(indx)
-   | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-   unsigned indx, unsigned threshold)
-{
-   return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-   | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-   unsigned indx, unsigned base_indx, unsigned offset)
-{
-   return host1x_uclass_wait_syncpt_base_indx_f(indx)
-   | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-   | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-   unsigned base_indx, unsigned offset)
-{
-   return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-   | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-   unsigned cond, unsigned indx)
-{
-   return host1x_uclass_incr_syncpt_cond_f(cond)
-   | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-   unsigned mod_id, unsigned offset, bool auto_inc)
-{
-   u32 v = host1x_uclass_indoff_indbe_f(0xf)
-   | host1x_uclass_indoff_indmodid_f(mod_id)
-   | host1x_uclass_indoff_indroffset_f(offset);
-   if (auto_inc)
-   v |= host1x_uclass_indoff_autoinc_f(1);
-   return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-   unsigned mod_id, unsigned offset, bool auto_inc)
-{
-   u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-   | host1x_uclass_indoff_indroffset_f(offset)
-   | host1x_uclass_indoff_rwn_read_v();
-   if (auto_inc)
-   v |= host1x_uclass_indoff_autoinc_f(1);
-   return v;
-}
-
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-   unsigned class_id, unsigned offset, unsigned mask)
-{
-   return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-   return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-   return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-   return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-   return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-   return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-   host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-   return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-   return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,
unsigned count)
-{
-   return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-   return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h 
b/drivers/gpu/host1x/hw/host1x02_hardware.h
index af60d7fb

[PATCH v7/v3 05/22] drm/tegra: falcon: Set DMACTX field on DMA transactions

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

The DMACTX field determines which context, as specified in the
TRANSCFG register, is used. While during boot it doesn't matter
which is used, later on it matters and this value is reused by
the firmware.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/falcon.c | 8 
 drivers/gpu/drm/tegra/falcon.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index 3762d87759d9..c0d85463eb1a 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -48,6 +48,14 @@ static int falcon_copy_chunk(struct falcon *falcon,
if (target == FALCON_MEMORY_IMEM)
cmd |= FALCON_DMATRFCMD_IMEM;
 
+   /*
+* Use second DMA context (i.e. the one for firmware). Strictly
+* speaking, at this point both DMA contexts point to the firmware
+* stream ID, but this register's value will be reused by the firmware
+* for later DMA transactions, so we need to use the correct value.
+*/
+   cmd |= FALCON_DMATRFCMD_DMACTX(1);
+
falcon_writel(falcon, offset, FALCON_DMATRFMOFFS);
falcon_writel(falcon, base, FALCON_DMATRFFBOFFS);
falcon_writel(falcon, cmd, FALCON_DMATRFCMD);
diff --git a/drivers/gpu/drm/tegra/falcon.h b/drivers/gpu/drm/tegra/falcon.h
index c56ee32d92ee..1955cf11a8a6 100644
--- a/drivers/gpu/drm/tegra/falcon.h
+++ b/drivers/gpu/drm/tegra/falcon.h
@@ -50,6 +50,7 @@
 #define FALCON_DMATRFCMD_IDLE  (1 << 1)
 #define FALCON_DMATRFCMD_IMEM  (1 << 4)
 #define FALCON_DMATRFCMD_SIZE_256B (6 << 8)
+#define FALCON_DMATRFCMD_DMACTX(v) (((v) & 0x7) << 12)
 
 #define FALCON_DMATRFFBOFFS0x111c
 
-- 
2.36.1



[PATCH v7/v3 10/22] dt-bindings: host1x: Fix bracketing in example

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

The bracketing for the interrupts property in the device tree
example is incorrect. Fix it.

Signed-off-by: Mikko Perttunen 
---
 .../bindings/display/tegra/nvidia,tegra20-host1x.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml 
b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml
index e6056e9445a7..5fe25e0a8d48 100644
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml
@@ -238,8 +238,8 @@ examples:
 host1x@5000 {
 compatible = "nvidia,tegra20-host1x";
 reg = <0x5000 0x00024000>;
-interrupts = <0 65 0x04   /* mpcore syncpt */
-  0 67 0x04>; /* mpcore general */
+interrupts = <0 65 0x04>, /* mpcore syncpt */
+ <0 67 0x04>; /* mpcore general */
 interrupt-names = "syncpt", "host1x";
 clocks = <_car TEGRA20_CLK_HOST1X>;
 clock-names = "host1x";
-- 
2.36.1



[PATCH v7/v3 17/22] gpu: host1x: Program interrupt destinations on Tegra234

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

On Tegra234, each Host1x VM has 8 interrupt lines. Each syncpoint
can be configured with which interrupt line should be used for
threshold interrupt, allowing for load balancing.

For now, to keep backwards compatibility, just set all syncpoints
to the first interrupt.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/hw/intr_hw.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index f56375ee6e71..9acccdb139e6 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -76,6 +76,17 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
/* update host clocks per usec */
host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
 #endif
+#if HOST1X_HW >= 8
+   u32 id;
+
+   /*
+* Program threshold interrupt destination among 8 lines per VM,
+* per syncpoint. For now, just direct all to the first interrupt
+* line.
+*/
+   for (id = 0; id < host->info->nb_pts; id++)
+   host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+#endif
 }
 
 static int
-- 
2.36.1



[PATCH v7/v3 21/22] gpu: host1x: Use RESTART_W to skip timed out jobs on Tegra186+

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

When MLOCK enforcement is enabled, the 0-word write currently done
is rejected by the hardware outside of an MLOCK region. As such,
on these chips, which also have the newer, more convenient RESTART_W
opcode, use that instead to skip over the timed out job.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/cdma.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 765e5aa64eb6..bb1f3c746be4 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -457,9 +457,24 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma 
*cdma,
 * to offset 0xbad. This does nothing but
 * has a easily detected signature in debug
 * traces.
+*
+* On systems with MLOCK enforcement enabled,
+* the above 0 word writes would fall foul of
+* the enforcement. As such, in the first slot
+* put a RESTART_W opcode to the beginning
+* of the next job. We don't use this for older
+* chips since those only support the RESTART
+* opcode with inconvenient alignment 
requirements.
 */
-   mapped[2*slot+0] = 0x1bad;
-   mapped[2*slot+1] = 0x1bad;
+   if (i == 0 && host1x->info->has_wide_gather) {
+   unsigned int next_job = 
(job->first_get/8 + job->num_slots)
+   % HOST1X_PUSHBUFFER_SLOTS;
+   mapped[2*slot+0] = (0xd << 28) | 
(next_job * 2);
+   mapped[2*slot+1] = 0x0;
+   } else {
+   mapped[2*slot+0] = 0x1bad;
+   mapped[2*slot+1] = 0x1bad;
+   }
}
 
job->cancelled = true;
-- 
2.36.1



[PATCH v7/v3 00/22] Host1x context isolation / Tegra234 support

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Integrated the Host1x context isolation series (patches 1 to 8) and
Tegra234 support series (patches 9 to 22) in one email thread for
the benefit of automatic testers.

Changes from previous versions:

Context isolation:
* Improved check to ensure context devices are attached to IOMMU
* Fixed build failure when CONFIG_IOMMU_API=n as reported by bot
* Dropped Thierry's Host1x schema YAML conversion from this series
  -- it was accidentally included in the previous
* Also dropped arm-smmu change for now. It can be merged later if
  necessary.

Tegra234:
* Split bracketing fix in DT schema to separate patch
* Added Acked-by

Thanks,
Mikko

Mikko Perttunen (22):
  dt-bindings: host1x: Add iommu-map property
  gpu: host1x: Add context device management code
  gpu: host1x: Program context stream ID on submission
  arm64: tegra: Add Host1x context stream IDs on Tegra186+
  drm/tegra: falcon: Set DMACTX field on DMA transactions
  drm/tegra: nvdec: Fix TRANSCFG register offset
  drm/tegra: Support context isolation
  drm/tegra: Implement stream ID related callbacks on engines
  dt-bindings: Add bindings for Tegra234 Host1x and VIC
  dt-bindings: host1x: Fix bracketing in example
  dt-bindings: Add headers for Host1x and VIC on Tegra234
  arm64: tegra: Add Host1x and VIC on Tegra234
  gpu: host1x: Deduplicate hardware headers
  gpu: host1x: Simplify register mapping and add common aperture
  gpu: host1x: Program virtualization tables
  gpu: host1x: Allow reset to be missing
  gpu: host1x: Program interrupt destinations on Tegra234
  gpu: host1x: Tegra234 device data and headers
  gpu: host1x: Rewrite job opcode sequence
  gpu: host1x: Add MLOCK release code on Tegra234
  gpu: host1x: Use RESTART_W to skip timed out jobs on Tegra186+
  drm/tegra: vic: Add Tegra234 support

 .../display/tegra/nvidia,tegra124-vic.yaml|   1 +
 .../display/tegra/nvidia,tegra20-host1x.yaml  | 115 +--
 arch/arm64/boot/dts/nvidia/tegra186.dtsi  |  11 ++
 arch/arm64/boot/dts/nvidia/tegra194.dtsi  |  11 ++
 arch/arm64/boot/dts/nvidia/tegra234.dtsi  |  46 +
 drivers/gpu/drm/tegra/drm.c   |   1 +
 drivers/gpu/drm/tegra/drm.h   |  11 ++
 drivers/gpu/drm/tegra/falcon.c|   8 +
 drivers/gpu/drm/tegra/falcon.h|   1 +
 drivers/gpu/drm/tegra/nvdec.c |  13 +-
 drivers/gpu/drm/tegra/submit.c|  48 -
 drivers/gpu/drm/tegra/uapi.c  |  43 -
 drivers/gpu/drm/tegra/vic.c   |  79 +++-
 drivers/gpu/host1x/Makefile   |   6 +-
 drivers/gpu/host1x/cdma.c |  19 +-
 drivers/gpu/host1x/context.c  | 160 
 drivers/gpu/host1x/context.h  |  38 
 drivers/gpu/host1x/dev.c  | 124 
 drivers/gpu/host1x/dev.h  |  13 ++
 drivers/gpu/host1x/hw/cdma_hw.c   |  34 
 drivers/gpu/host1x/hw/channel_hw.c| 136 +
 drivers/gpu/host1x/hw/host1x01_hardware.h | 114 +--
 drivers/gpu/host1x/hw/host1x02_hardware.h | 113 +--
 drivers/gpu/host1x/hw/host1x04_hardware.h | 113 +--
 drivers/gpu/host1x/hw/host1x05_hardware.h | 113 +--
 drivers/gpu/host1x/hw/host1x06_hardware.h | 118 +---
 drivers/gpu/host1x/hw/host1x07_hardware.h | 118 +---
 drivers/gpu/host1x/hw/host1x08.c  |  33 
 drivers/gpu/host1x/hw/host1x08.h  |  15 ++
 drivers/gpu/host1x/hw/host1x08_hardware.h |  21 ++
 drivers/gpu/host1x/hw/hw_host1x08_channel.h   |  11 ++
 drivers/gpu/host1x/hw/hw_host1x08_common.h|  11 ++
 .../gpu/host1x/hw/hw_host1x08_hypervisor.h|   9 +
 drivers/gpu/host1x/hw/hw_host1x08_uclass.h| 181 ++
 drivers/gpu/host1x/hw/hw_host1x08_vm.h|  36 
 drivers/gpu/host1x/hw/intr_hw.c   |  11 ++
 drivers/gpu/host1x/hw/opcodes.h   | 150 +++
 include/dt-bindings/clock/tegra234-clock.h|   4 +
 include/dt-bindings/memory/tegra234-mc.h  |   5 +
 .../dt-bindings/power/tegra234-powergate.h|   1 +
 include/dt-bindings/reset/tegra234-reset.h|   1 +
 include/linux/host1x.h|  42 
 42 files changed, 1357 insertions(+), 781 deletions(-)
 create mode 100644 drivers/gpu/host1x/context.c
 create mode 100644 drivers/gpu/host1x/context.h
 create mode 100644 drivers/gpu/host1x/hw/host1x08.c
 create mode 100644 drivers/gpu/host1x/hw/host1x08.h
 create mode 100644 drivers/gpu/host1x/hw/host1x08_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_common.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_vm.h
 create mode 100644 drivers/gpu/host1x/hw

[PATCH v7/v3 22/22] drm/tegra: vic: Add Tegra234 support

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Add Tegra234 support for VIC. It is backwards compatible with
Tegra194.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/drm/tegra/drm.c |  1 +
 drivers/gpu/drm/tegra/vic.c | 12 
 2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 9464f522e257..426ffeb51f72 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1380,6 +1380,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
{ .compatible = "nvidia,tegra194-sor", },
{ .compatible = "nvidia,tegra194-vic", },
{ .compatible = "nvidia,tegra194-nvdec", },
+   { .compatible = "nvidia,tegra234-vic", },
{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index c5526bda88d6..c1734f823d04 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -447,11 +447,20 @@ static const struct vic_config vic_t194_config = {
.supports_sid = true,
 };
 
+#define NVIDIA_TEGRA_234_VIC_FIRMWARE "nvidia/tegra234/vic.bin"
+
+static const struct vic_config vic_t234_config = {
+   .firmware = NVIDIA_TEGRA_234_VIC_FIRMWARE,
+   .version = 0x23,
+   .supports_sid = true,
+};
+
 static const struct of_device_id tegra_vic_of_match[] = {
{ .compatible = "nvidia,tegra124-vic", .data = _t124_config },
{ .compatible = "nvidia,tegra210-vic", .data = _t210_config },
{ .compatible = "nvidia,tegra186-vic", .data = _t186_config },
{ .compatible = "nvidia,tegra194-vic", .data = _t194_config },
+   { .compatible = "nvidia,tegra234-vic", .data = _t234_config },
{ },
 };
 MODULE_DEVICE_TABLE(of, tegra_vic_of_match);
@@ -590,3 +599,6 @@ MODULE_FIRMWARE(NVIDIA_TEGRA_186_VIC_FIRMWARE);
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC)
 MODULE_FIRMWARE(NVIDIA_TEGRA_194_VIC_FIRMWARE);
 #endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_234_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_234_VIC_FIRMWARE);
+#endif
-- 
2.36.1



[PATCH v7/v3 02/22] gpu: host1x: Add context device management code

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Add code to register context devices from device tree, allocate them
out and manage their refcounts.

Signed-off-by: Mikko Perttunen 
---
v2:
* Directly set DMA mask instead of inheriting from Host1x.
* Use iommu-map instead of custom DT property.
v4:
* Use u64 instead of dma_addr_t for DMA mask
* Use unsigned ints for indexes and adjust error handling flow
* Parse iommu-map property at top level host1x DT node
* Use separate DMA mask per device
* Export symbols as GPL
v5:
* Rename host1x_context to host1x_memory_context
v6:
* Use device_iommu_mapped to check if the context device got
  mapped to an IOMMU.
* Fix build with CONFIG_IOMMU_API disabled.
---
 drivers/gpu/host1x/Makefile  |   3 +
 drivers/gpu/host1x/context.c | 160 +++
 drivers/gpu/host1x/context.h |  38 +
 drivers/gpu/host1x/dev.c |  12 ++-
 drivers/gpu/host1x/dev.h |   2 +
 include/linux/host1x.h   |  34 
 6 files changed, 248 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/host1x/context.c
 create mode 100644 drivers/gpu/host1x/context.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index c891a3e33844..28f7aaedce0f 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -17,5 +17,8 @@ host1x-y = \
hw/host1x06.o \
hw/host1x07.o
 
+host1x-$(CONFIG_IOMMU_API) += \
+   context.o
+
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
 obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o
diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
new file mode 100644
index ..b08cf11f9a66
--- /dev/null
+++ b/drivers/gpu/host1x/context.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "context.h"
+#include "dev.h"
+
+int host1x_memory_context_list_init(struct host1x *host1x)
+{
+   struct host1x_memory_context_list *cdl = >context_list;
+   struct device_node *node = host1x->dev->of_node;
+   struct host1x_memory_context *ctx;
+   unsigned int i;
+   int err;
+
+   cdl->devs = NULL;
+   cdl->len = 0;
+   mutex_init(>lock);
+
+   err = of_property_count_u32_elems(node, "iommu-map");
+   if (err < 0)
+   return 0;
+
+   cdl->devs = kcalloc(err, sizeof(*cdl->devs), GFP_KERNEL);
+   if (!cdl->devs)
+   return -ENOMEM;
+   cdl->len = err / 4;
+
+   for (i = 0; i < cdl->len; i++) {
+   struct iommu_fwspec *fwspec;
+
+   ctx = >devs[i];
+
+   ctx->host = host1x;
+
+   device_initialize(>dev);
+
+   /*
+* Due to an issue with T194 NVENC, only 38 bits can be used.
+* Anyway, 256GiB of IOVA ought to be enough for anyone.
+*/
+   ctx->dma_mask = DMA_BIT_MASK(38);
+   ctx->dev.dma_mask = >dma_mask;
+   ctx->dev.coherent_dma_mask = ctx->dma_mask;
+   dev_set_name(>dev, "host1x-ctx.%d", i);
+   ctx->dev.bus = _context_device_bus_type;
+   ctx->dev.parent = host1x->dev;
+
+   dma_set_max_seg_size(>dev, UINT_MAX);
+
+   err = device_add(>dev);
+   if (err) {
+   dev_err(host1x->dev, "could not add context device %d: 
%d\n", i, err);
+   goto del_devices;
+   }
+
+   err = of_dma_configure_id(>dev, node, true, );
+   if (err) {
+   dev_err(host1x->dev, "IOMMU configuration failed for 
context device %d: %d\n",
+   i, err);
+   device_del(>dev);
+   goto del_devices;
+   }
+
+   fwspec = dev_iommu_fwspec_get(>dev);
+   if (!fwspec || !device_iommu_mapped(>dev)) {
+   dev_err(host1x->dev, "Context device %d has no 
IOMMU!\n", i);
+   device_del(>dev);
+   goto del_devices;
+   }
+
+   ctx->stream_id = fwspec->ids[0] & 0x;
+   }
+
+   return 0;
+
+del_devices:
+   while (i--)
+   device_del(>devs[i].dev);
+
+   kfree(cdl->devs);
+   cdl->len = 0;
+
+   return err;
+}
+
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+   unsigned int i;
+
+   for (i = 0; i < cdl->len; i++)
+   device_del(>devs[i].dev);
+
+   kfree(cdl->devs);
+   cdl->len = 0;
+}
+
+struct host1x_memory_context *host1x_memory_context_alloc(struct host1x 
*host1x,
+ 

[PATCH v7/v3 15/22] gpu: host1x: Program virtualization tables

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Program virtualization tables specifying which VMs have access to which
Host1x hardware resources. Programming these has become mandatory in
Tegra234.

For now, since the driver does not operate as a Host1x hypervisor, we
basically allow access to everything to everyone.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 21 ++---
 drivers/gpu/host1x/dev.h |  8 
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 8c6ce8014c09..5a57165ab0f6 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -217,7 +217,7 @@ static const struct of_device_id host1x_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, host1x_of_match);
 
-static void host1x_setup_sid_table(struct host1x *host)
+static void host1x_setup_virtualization_tables(struct host1x *host)
 {
const struct host1x_info *info = host->info;
unsigned int i;
@@ -231,6 +231,21 @@ static void host1x_setup_sid_table(struct host1x *host)
host1x_hypervisor_writel(host, entry->offset, entry->base);
host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
}
+
+   for (i = 0; i < info->streamid_vm_table.count; i++) {
+   /* Allow access to all stream IDs to all VMs. */
+   host1x_hypervisor_writel(host, 0xff, 
info->streamid_vm_table.base + 4 * i);
+   }
+
+   for (i = 0; i < info->classid_vm_table.count; i++) {
+   /* Allow access to all classes to all VMs. */
+   host1x_hypervisor_writel(host, 0xff, 
info->classid_vm_table.base + 4 * i);
+   }
+
+   for (i = 0; i < info->mmio_vm_table.count; i++) {
+   /* Use VM1 (that's us) as originator VMID for engine MMIO 
accesses. */
+   host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 
4 * i);
+   }
 }
 
 static bool host1x_wants_iommu(struct host1x *host1x)
@@ -602,7 +617,7 @@ static int __maybe_unused host1x_runtime_suspend(struct 
device *dev)
return 0;
 
 resume_host1x:
-   host1x_setup_sid_table(host);
+   host1x_setup_virtualization_tables(host);
host1x_syncpt_restore(host);
host1x_intr_start(host);
 
@@ -632,7 +647,7 @@ static int __maybe_unused host1x_runtime_resume(struct 
device *dev)
goto disable_clk;
}
 
-   host1x_setup_sid_table(host);
+   host1x_setup_virtualization_tables(host);
host1x_syncpt_restore(host);
host1x_intr_start(host);
 
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 85edcc6e0fc7..920e5548cfbc 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -90,6 +90,11 @@ struct host1x_sid_entry {
unsigned int limit;
 };
 
+struct host1x_table_desc {
+   unsigned int base;
+   unsigned int count;
+};
+
 struct host1x_info {
unsigned int nb_channels; /* host1x: number of channels supported */
unsigned int nb_pts; /* host1x: number of syncpoints supported */
@@ -103,6 +108,9 @@ struct host1x_info {
bool has_common; /* has common registers separate from hypervisor */
unsigned int num_sid_entries;
const struct host1x_sid_entry *sid_table;
+   struct host1x_table_desc streamid_vm_table;
+   struct host1x_table_desc classid_vm_table;
+   struct host1x_table_desc mmio_vm_table;
/*
 * On T20-T148, the boot chain may setup DC to increment syncpoints
 * 26/27 on VBLANK. As such we cannot use these syncpoints until
-- 
2.36.1



[PATCH v7/v3 09/22] dt-bindings: Add bindings for Tegra234 Host1x and VIC

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Update VIC and Host1x bindings for changes in Tegra234.

Namely,
- New compatible strings
- Sharded syncpoint interrupts
- Optional reset.

Also, fix the order of descriptions for VM/hypervisor
register apertures -- while the reg-names specification
was correct, the descriptions for these were switched.

Signed-off-by: Mikko Perttunen 
---
v2:
* Add back 'required' for resets/reset-names on older SoCs
* Simplified reg descriptions
* Updated commit message
v3:
* Split out bracketing change in example
---
 .../display/tegra/nvidia,tegra124-vic.yaml|   1 +
 .../display/tegra/nvidia,tegra20-host1x.yaml  | 106 +++---
 2 files changed, 93 insertions(+), 14 deletions(-)

diff --git 
a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra124-vic.yaml 
b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra124-vic.yaml
index 37bb5ddc1963..7200095ef19e 100644
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra124-vic.yaml
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra124-vic.yaml
@@ -21,6 +21,7 @@ properties:
   - nvidia,tegra210-vic
   - nvidia,tegra186-vic
   - nvidia,tegra194-vic
+  - nvidia,tegra234-vic
 
   - items:
   - const: nvidia,tegra132-vic
diff --git 
a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml 
b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml
index 0adeb03b9e3a..e6056e9445a7 100644
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml
@@ -24,6 +24,7 @@ properties:
   - nvidia,tegra210-host1x
   - nvidia,tegra186-host1x
   - nvidia,tegra194-host1x
+  - nvidia,tegra234-host1x
 
   - items:
   - const: nvidia,tegra132-host1x
@@ -31,23 +32,19 @@ properties:
 
   reg:
 minItems: 1
-maxItems: 2
+maxItems: 3
 
   reg-names:
 minItems: 1
-maxItems: 2
+maxItems: 3
 
   interrupts:
-items:
-  - description: host1x syncpoint interrupt
-  - description: host1x general interrupt
 minItems: 1
+maxItems: 9
 
   interrupt-names:
-items:
-  - const: syncpt
-  - const: host1x
 minItems: 1
+maxItems: 9
 
   '#address-cells':
 description: The number of cells used to represent physical base addresses
@@ -110,13 +107,35 @@ required:
   - reg
   - clocks
   - clock-names
-  - resets
-  - reset-names
 
 additionalProperties:
   type: object
 
 allOf:
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - nvidia,tegra20-host1x
+  - nvidia,tegra30-host1x
+  - nvidia,tegra114-host1x
+  - nvidia,tegra124-host1x
+  - nvidia,tegra210-host1x
+then:
+  properties:
+interrupts:
+  items:
+- description: host1x syncpoint interrupt
+- description: host1x general interrupt
+
+interrupt-names:
+  items:
+- const: syncpt
+- const: host1x
+  required:
+- resets
+- reset-names
   - if:
   properties:
 compatible:
@@ -133,10 +152,8 @@ allOf:
 
 reg:
   items:
-- description: physical base address and length of the register
-region assigned to the VM
-- description: physical base address and length of the register
-region used by the hypervisor
+- description: region used by the hypervisor
+- description: region assigned to the virtual machine
 
 resets:
   maxItems: 1
@@ -144,6 +161,67 @@ allOf:
 reset-names:
   maxItems: 1
 
+interrupts:
+  items:
+- description: host1x syncpoint interrupt
+- description: host1x general interrupt
+
+interrupt-names:
+  items:
+- const: syncpt
+- const: host1x
+
+iommu-map:
+  description: Specification of stream IDs available for memory 
context device
+use. Should be a mapping of IDs 0..n to IOMMU entries 
corresponding to
+usable stream IDs.
+
+  required:
+- reg-names
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - nvidia,tegra234-host1x
+then:
+  properties:
+reg-names:
+  items:
+- const: common
+- const: hypervisor
+- const: vm
+
+reg:
+  items:
+- description: region used by host1x server
+- description: region used by the hypervisor
+- description: region assigned to the virtual machine
+
+interrupts:
+  items:
+- description: host1x syncpoint interrupt 0
+- description: host1x syncpoint interrupt 1

[PATCH v7/v3 08/22] drm/tegra: Implement stream ID related callbacks on engines

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Implement the get_streamid_offset and can_use_memory_ctx callbacks
required for supporting context isolation. Since old firmware on VIC
cannot support context isolation without hacks that we don't want to
implement, check the firmware binary to see if context isolation
should be enabled.

Signed-off-by: Mikko Perttunen 
---
v5:
* Split into two callbacks
* Add NVDEC support
v4:
* Add locking in vic_load_firmware
* Return -EOPNOTSUPP if context isolation is not available
* Update for changed get_streamid_offset declaration
* Add comment noting that vic_load_firmware is safe to call
  without the hardware being powered on

Implement context isolation related callbacks in VIC, NVDEC
---
 drivers/gpu/drm/tegra/drm.h   |  8 +
 drivers/gpu/drm/tegra/nvdec.c |  9 +
 drivers/gpu/drm/tegra/vic.c   | 67 ++-
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 2acc8f2948ad..845e60f144c7 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -100,6 +100,14 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 struct drm_tegra_submit *args, struct drm_device *drm,
 struct drm_file *file);
 
+static inline int
+tegra_drm_get_streamid_offset_thi(struct tegra_drm_client *client, u32 *offset)
+{
+   *offset = 0x30;
+
+   return 0;
+}
+
 struct tegra_drm_client {
struct host1x_client base;
struct list_head list;
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 386f9b2e78c4..a84f61709679 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -306,10 +306,19 @@ static void nvdec_close_channel(struct tegra_drm_context 
*context)
host1x_channel_put(context->channel);
 }
 
+static int nvdec_can_use_memory_ctx(struct tegra_drm_client *client, bool 
*supported)
+{
+   *supported = true;
+
+   return 0;
+}
+
 static const struct tegra_drm_client_ops nvdec_ops = {
.open_channel = nvdec_open_channel,
.close_channel = nvdec_close_channel,
.submit = tegra_drm_submit,
+   .get_streamid_offset = tegra_drm_get_streamid_offset_thi,
+   .can_use_memory_ctx = nvdec_can_use_memory_ctx,
 };
 
 #define NVIDIA_TEGRA_210_NVDEC_FIRMWARE "nvidia/tegra210/nvdec.bin"
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index f56f5921a8c2..c5526bda88d6 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -38,6 +38,8 @@ struct vic {
struct clk *clk;
struct reset_control *rst;
 
+   bool can_use_context;
+
/* Platform configuration */
const struct vic_config *config;
 };
@@ -229,28 +231,38 @@ static int vic_load_firmware(struct vic *vic)
 {
struct host1x_client *client = >client.base;
struct tegra_drm *tegra = vic->client.drm;
+   static DEFINE_MUTEX(lock);
+   u32 fce_bin_data_offset;
dma_addr_t iova;
size_t size;
void *virt;
int err;
 
-   if (vic->falcon.firmware.virt)
-   return 0;
+   mutex_lock();
+
+   if (vic->falcon.firmware.virt) {
+   err = 0;
+   goto unlock;
+   }
 
err = falcon_read_firmware(>falcon, vic->config->firmware);
if (err < 0)
-   return err;
+   goto unlock;
 
size = vic->falcon.firmware.size;
 
if (!client->group) {
virt = dma_alloc_coherent(vic->dev, size, , GFP_KERNEL);
-   if (!virt)
-   return -ENOMEM;
+   if (!virt) {
+   err = -ENOMEM;
+   goto unlock;
+   }
} else {
virt = tegra_drm_alloc(tegra, size, );
-   if (IS_ERR(virt))
-   return PTR_ERR(virt);
+   if (IS_ERR(virt)) {
+   err = PTR_ERR(virt);
+   goto unlock;
+   }
}
 
vic->falcon.firmware.virt = virt;
@@ -277,7 +289,28 @@ static int vic_load_firmware(struct vic *vic)
vic->falcon.firmware.phys = phys;
}
 
-   return 0;
+   /*
+* Check if firmware is new enough to not require mapping firmware
+* to data buffer domains.
+*/
+   fce_bin_data_offset = *(u32 *)(virt + VIC_UCODE_FCE_DATA_OFFSET);
+
+   if (!vic->config->supports_sid) {
+   vic->can_use_context = false;
+   } else if (fce_bin_data_offset != 0x0 && fce_bin_data_offset != 
0xa5a5a5a5) {
+   /*
+* Firmware will access FCE through STREAMID0, so context
+* isolation cannot be used.
+*/
+   vic->can_use_context = false;
+   dev_warn_once(vic-&g

[PATCH v7/v3 07/22] drm/tegra: Support context isolation

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

For engines that support context isolation, allocate a context when
opening a channel, and set up stream ID offset and context fields
when submitting a job.

As of this commit, the stream ID offset and fallback stream ID
are not used when context isolation is disabled. However, with
upcoming patches that enable a full featured job opcode sequence,
these will be necessary.

Signed-off-by: Mikko Perttunen 
---
v5:
* On supporting engines, always program stream ID offset and
  new fallback stream ID.
* Rename host1x_context to host1x_memory_context
v4:
* Separate error and output values in get_streamid_offset API
* Improve error handling
* Rename job->context to job->memory_context for clarity
---
 drivers/gpu/drm/tegra/drm.h|  3 +++
 drivers/gpu/drm/tegra/submit.c | 48 +-
 drivers/gpu/drm/tegra/uapi.c   | 43 --
 3 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index fc0a19554eac..2acc8f2948ad 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -80,6 +80,7 @@ struct tegra_drm_context {
 
/* Only used by new UAPI. */
struct xarray mappings;
+   struct host1x_memory_context *memory_context;
 };
 
 struct tegra_drm_client_ops {
@@ -91,6 +92,8 @@ struct tegra_drm_client_ops {
int (*submit)(struct tegra_drm_context *context,
  struct drm_tegra_submit *args, struct drm_device *drm,
  struct drm_file *file);
+   int (*get_streamid_offset)(struct tegra_drm_client *client, u32 
*offset);
+   int (*can_use_memory_ctx)(struct tegra_drm_client *client, bool 
*supported);
 };
 
 int tegra_drm_submit(struct tegra_drm_context *context,
diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index 6d6dd8c35475..b24738bdf3df 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -498,6 +498,9 @@ static void release_job(struct host1x_job *job)
struct tegra_drm_submit_data *job_data = job->user_data;
u32 i;
 
+   if (job->memory_context)
+   host1x_memory_context_put(job->memory_context);
+
for (i = 0; i < job_data->num_used_mappings; i++)
tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
 
@@ -588,11 +591,51 @@ int tegra_drm_ioctl_channel_submit(struct drm_device 
*drm, void *data,
goto put_job;
}
 
+   if (context->client->ops->get_streamid_offset) {
+   err = context->client->ops->get_streamid_offset(
+   context->client, >engine_streamid_offset);
+   if (err) {
+   SUBMIT_ERR(context, "failed to get streamid offset: 
%d", err);
+   goto unpin_job;
+   }
+   }
+
+   if (context->memory_context && 
context->client->ops->can_use_memory_ctx) {
+   bool supported;
+
+   err = context->client->ops->can_use_memory_ctx(context->client, 
);
+   if (err) {
+   SUBMIT_ERR(context, "failed to detect if engine can use 
memory context: %d", err);
+   goto unpin_job;
+   }
+
+   if (supported) {
+   job->memory_context = context->memory_context;
+   host1x_memory_context_get(job->memory_context);
+   }
+   } else if (context->client->ops->get_streamid_offset) {
+#ifdef CONFIG_IOMMU_API
+   struct iommu_fwspec *spec;
+
+   /*
+* Job submission will need to temporarily change stream ID,
+* so need to tell it what to change it back to.
+*/
+   spec = dev_iommu_fwspec_get(context->client->base.dev);
+   if (spec && spec->num_ids > 0)
+   job->engine_fallback_streamid = spec->ids[0] & 0x;
+   else
+   job->engine_fallback_streamid = 0x7f;
+#else
+   job->engine_fallback_streamid = 0x7f;
+#endif
+   }
+
/* Boot engine. */
err = pm_runtime_resume_and_get(context->client->base.dev);
if (err < 0) {
SUBMIT_ERR(context, "could not power up engine: %d", err);
-   goto unpin_job;
+   goto put_memory_context;
}
 
job->user_data = job_data;
@@ -627,6 +670,9 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, 
void *data,
 
goto put_job;
 
+put_memory_context:
+   if (job->memory_context)
+   host1x_memory_context_put(job->memory_context);
 unpin_job:
host1x_job_unpin(job);
 put_job:
diff --git a/drivers/gpu/drm/t

[PATCH v7/v3 14/22] gpu: host1x: Simplify register mapping and add common aperture

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Refactor 'regs' property loading using devm_platform_ioremap_*
and add loading of the 'common' region found on Tegra234.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/dev.c | 46 +---
 drivers/gpu/host1x/dev.h |  3 +++
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 89cc79a48eab..8c6ce8014c09 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -40,6 +40,11 @@
 #include "hw/host1x06.h"
 #include "hw/host1x07.h"
 
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
+{
+   writel(v, host1x->common_regs + r);
+}
+
 void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
 {
writel(v, host1x->hv_regs + r);
@@ -412,7 +417,6 @@ static int host1x_get_resets(struct host1x *host)
 static int host1x_probe(struct platform_device *pdev)
 {
struct host1x *host;
-   struct resource *regs, *hv_regs = NULL;
int syncpt_irq;
int err;
 
@@ -423,25 +427,23 @@ static int host1x_probe(struct platform_device *pdev)
host->info = of_device_get_match_data(>dev);
 
if (host->info->has_hypervisor) {
-   regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
-   if (!regs) {
-   dev_err(>dev, "failed to get vm registers\n");
-   return -ENXIO;
-   }
+   host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
+   if (IS_ERR(host->regs))
+   return PTR_ERR(host->regs);
+
+   host->hv_regs = devm_platform_ioremap_resource_byname(pdev, 
"hypervisor");
+   if (IS_ERR(host->hv_regs))
+   return PTR_ERR(host->hv_regs);
 
-   hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-  "hypervisor");
-   if (!hv_regs) {
-   dev_err(>dev,
-   "failed to get hypervisor registers\n");
-   return -ENXIO;
+   if (host->info->has_common) {
+   host->common_regs = 
devm_platform_ioremap_resource_byname(pdev, "common");
+   if (IS_ERR(host->common_regs))
+   return PTR_ERR(host->common_regs);
}
} else {
-   regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   if (!regs) {
-   dev_err(>dev, "failed to get registers\n");
-   return -ENXIO;
-   }
+   host->regs = devm_platform_ioremap_resource(pdev, 0);
+   if (IS_ERR(host->regs))
+   return PTR_ERR(host->regs);
}
 
syncpt_irq = platform_get_irq(pdev, 0);
@@ -456,16 +458,6 @@ static int host1x_probe(struct platform_device *pdev)
/* set common host1x device data */
platform_set_drvdata(pdev, host);
 
-   host->regs = devm_ioremap_resource(>dev, regs);
-   if (IS_ERR(host->regs))
-   return PTR_ERR(host->regs);
-
-   if (host->info->has_hypervisor) {
-   host->hv_regs = devm_ioremap_resource(>dev, hv_regs);
-   if (IS_ERR(host->hv_regs))
-   return PTR_ERR(host->hv_regs);
-   }
-
host->dev->dma_parms = >dma_parms;
dma_set_max_seg_size(host->dev, UINT_MAX);
 
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 7552a4554534..85edcc6e0fc7 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -100,6 +100,7 @@ struct host1x_info {
u64 dma_mask; /* mask of addressable memory */
bool has_wide_gather; /* supports GATHER_W opcode */
bool has_hypervisor; /* has hypervisor registers */
+   bool has_common; /* has common registers separate from hypervisor */
unsigned int num_sid_entries;
const struct host1x_sid_entry *sid_table;
/*
@@ -115,6 +116,7 @@ struct host1x {
 
void __iomem *regs;
void __iomem *hv_regs; /* hypervisor region */
+   void __iomem *common_regs;
struct host1x_syncpt *syncpt;
struct host1x_syncpt_base *bases;
struct device *dev;
@@ -156,6 +158,7 @@ struct host1x {
struct host1x_bo_cache cache;
 };
 
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r);
 void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
 u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
 void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
-- 
2.36.1



[PATCH v7/v3 06/22] drm/tegra: nvdec: Fix TRANSCFG register offset

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

NVDEC's TRANSCFG register is at a different offset than VIC.
This becomes a problem now when context isolation is enabled and
the reset value of the register is no longer sufficient.

Signed-off-by: Mikko Perttunen 
---
v6:
* New patch
---
 drivers/gpu/drm/tegra/nvdec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 79e1e88203cf..386f9b2e78c4 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -21,6 +21,8 @@
 #include "falcon.h"
 #include "vic.h"
 
+#define NVDEC_TFBIF_TRANSCFG   0x2c44
+
 struct nvdec_config {
const char *firmware;
unsigned int version;
@@ -63,7 +65,7 @@ static int nvdec_boot(struct nvdec *nvdec)
u32 value;
 
value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, 
TRANSCFG_SID_HW);
-   nvdec_writel(nvdec, value, VIC_TFBIF_TRANSCFG);
+   nvdec_writel(nvdec, value, NVDEC_TFBIF_TRANSCFG);
 
if (spec->num_ids > 0) {
value = spec->ids[0] & 0x;
-- 
2.36.1



[PATCH v7/v3 18/22] gpu: host1x: Tegra234 device data and headers

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Add device data and chip headers for Tegra234.

Signed-off-by: Mikko Perttunen 
---
 drivers/gpu/host1x/Makefile   |   3 +-
 drivers/gpu/host1x/dev.c  |  42 
 drivers/gpu/host1x/hw/host1x08.c  |  33 
 drivers/gpu/host1x/hw/host1x08.h  |  15 ++
 drivers/gpu/host1x/hw/host1x08_hardware.h |  21 ++
 drivers/gpu/host1x/hw/hw_host1x08_channel.h   |  11 ++
 drivers/gpu/host1x/hw/hw_host1x08_common.h|   4 +
 .../gpu/host1x/hw/hw_host1x08_hypervisor.h|   9 +
 drivers/gpu/host1x/hw/hw_host1x08_uclass.h| 181 ++
 drivers/gpu/host1x/hw/hw_host1x08_vm.h|  36 
 10 files changed, 354 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/host1x/hw/host1x08.c
 create mode 100644 drivers/gpu/host1x/hw/host1x08.h
 create mode 100644 drivers/gpu/host1x/hw/host1x08_hardware.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_channel.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_common.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_uclass.h
 create mode 100644 drivers/gpu/host1x/hw/hw_host1x08_vm.h

diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 28f7aaedce0f..ee5286ffe08d 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -15,7 +15,8 @@ host1x-y = \
hw/host1x04.o \
hw/host1x05.o \
hw/host1x06.o \
-   hw/host1x07.o
+   hw/host1x07.o \
+   hw/host1x08.o
 
 host1x-$(CONFIG_IOMMU_API) += \
context.o
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index c28939850bf6..0cd3f97e7e49 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -39,6 +39,7 @@
 #include "hw/host1x05.h"
 #include "hw/host1x06.h"
 #include "hw/host1x07.h"
+#include "hw/host1x08.h"
 
 void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
 {
@@ -205,7 +206,48 @@ static const struct host1x_info host1x07_info = {
.reserve_vblank_syncpts = false,
 };
 
+/*
+ * Tegra234 has two stream ID protection tables, one for setting stream IDs
+ * through the channel path via SETSTREAMID, and one for setting them via
+ * MMIO. We program each engine's data stream ID in the channel path table
+ * and firmware stream ID in the MMIO path table.
+ */
+static const struct host1x_sid_entry tegra234_sid_table[] = {
+   {
+   /* VIC channel */
+   .base = 0x17b8,
+   .offset = 0x30,
+   .limit = 0x30
+   },
+   {
+   /* VIC MMIO */
+   .base = 0x1688,
+   .offset = 0x34,
+   .limit = 0x34
+   },
+};
+
+static const struct host1x_info host1x08_info = {
+   .nb_channels = 63,
+   .nb_pts = 1024,
+   .nb_mlocks = 24,
+   .nb_bases = 0,
+   .init = host1x08_init,
+   .sync_offset = 0x0,
+   .dma_mask = DMA_BIT_MASK(40),
+   .has_wide_gather = true,
+   .has_hypervisor = true,
+   .has_common = true,
+   .num_sid_entries = ARRAY_SIZE(tegra234_sid_table),
+   .sid_table = tegra234_sid_table,
+   .streamid_vm_table = { 0x1004, 128 },
+   .classid_vm_table = { 0x1404, 25 },
+   .mmio_vm_table = { 0x1504, 25 },
+   .reserve_vblank_syncpts = false,
+};
+
 static const struct of_device_id host1x_of_match[] = {
+   { .compatible = "nvidia,tegra234-host1x", .data = _info, },
{ .compatible = "nvidia,tegra194-host1x", .data = _info, },
{ .compatible = "nvidia,tegra186-host1x", .data = _info, },
{ .compatible = "nvidia,tegra210-host1x", .data = _info, },
diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c
new file mode 100644
index ..754890c34c74
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x08.h"
+#include "host1x08_hardware.h"
+
+/* include code */
+#define HOST1X_HW 8
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x08_init(struct host1x *host)
+{
+   host->channel_op = _channel_ops;
+   host->cdma_op = _cdma_ops;
+   host->cdma_pb_op = _pushbuffer_ops;
+   host->syncpt_op = _syncpt_ops;
+   host->intr_op = _intr_ops;
+   host->debug_op = _debug_ops;
+
+   return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h
new file mode 100644
index ..a6bad56e44cf
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.h
@@ -0,0 +1,

[PATCH v7/v3 12/22] arm64: tegra: Add Host1x and VIC on Tegra234

2022-06-27 Thread Mikko Perttunen
From: Mikko Perttunen 

Add device tree nodes for Host1x and VIC on Tegra234.

Signed-off-by: Mikko Perttunen 
---
 arch/arm64/boot/dts/nvidia/tegra234.dtsi | 46 
 1 file changed, 46 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
index cb3af539e477..cae68e59580c 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi
@@ -454,6 +454,52 @@ misc@10 {
status = "okay";
};
 
+   host1x@13e0 {
+   compatible = "nvidia,tegra234-host1x";
+   reg = <0x13e0 0x1>,
+ <0x13e1 0x1>,
+ <0x13e4 0x1>;
+   reg-names = "common", "hypervisor", "vm";
+   interrupts = ,
+,
+,
+,
+,
+,
+,
+,
+;
+   interrupt-names = "syncpt0", "syncpt1", "syncpt2", 
"syncpt3", "syncpt4",
+ "syncpt5", "syncpt6", "syncpt7", 
"host1x";
+   clocks = < TEGRA234_CLK_HOST1X>;
+   clock-names = "host1x";
+
+   #address-cells = <1>;
+   #size-cells = <1>;
+
+   ranges = <0x1500 0x1500 0x0100>;
+   interconnects = < TEGRA234_MEMORY_CLIENT_HOST1XDMAR 
>;
+   interconnect-names = "dma-mem";
+   iommus = <_niso1 TEGRA234_SID_HOST1X>;
+
+   vic@1534 {
+   compatible = "nvidia,tegra234-vic";
+   reg = <0x1534 0x0004>;
+   interrupts = ;
+   clocks = < TEGRA234_CLK_VIC>;
+   clock-names = "vic";
+   resets = < TEGRA234_RESET_VIC>;
+   reset-names = "vic";
+
+   power-domains = < 
TEGRA234_POWER_DOMAIN_VIC>;
+   interconnects = < 
TEGRA234_MEMORY_CLIENT_VICSRD >,
+   < 
TEGRA234_MEMORY_CLIENT_VICSWR >;
+   interconnect-names = "dma-mem", "write";
+   iommus = <_niso1 TEGRA234_SID_VIC>;
+   dma-coherent;
+   };
+   };
+
gpio: gpio@220 {
compatible = "nvidia,tegra234-gpio";
reg-names = "security", "gpio";
-- 
2.36.1



  1   2   3   4   5   6   7   >