On Fri, Nov 08, 2019 at 05:02:07PM +0100, Thierry Reding wrote:
> From: Thierry Reding <tred...@nvidia.com>
> 
> The purpose of BAR1 is primarily to make memory accesses coherent.
> However, some GPUs do not have BAR1 functionality. For example, the
> GV11B found on the Xavier SoC is DMA coherent and therefore doesn't
> need BAR1.
> 
> Implement a variant of FIFO channels that work without a mapping of
> instance memory through BAR1.
> 
> XXX ensure memory barriers are in place for writes
> 
> Signed-off-by: Thierry Reding <tred...@nvidia.com>
> ---
> Hi Ben,
> 
> I'm sending this a bit out of context (it's part of the larger series to
> enable basic GV11B support) because I wanted to get some early feedback
> from you on this.
> 
> For a bit of background: GV11B as it turns out doesn't really have BAR1
> anymore. The SoC has a coherency fabric which means that basically the
> GPU's system memory accesses are already coherent and hence we no longer
> need to go via BAR1 to ensure that. Functionally the same can be done by
> just writing to the memory via the CPU's virtual mapping.
> 
> So this patch implement basically a second variant of the FIFO channel
> which, instead of taking a physical address and then ioremapping that,
> takes a struct nvkm_memory object. This seems to work, though since I'm
> not really doing much yet (firmware loading fails, etc.) I wouldn't call
> this "done" just yet.
> 
> In fact there are a few things that I'm not happy about. For example I
> think we'll eventually need to have barriers to ensure that the CPU
> write buffers are flushed, etc. It also seems like most users of the
> FIFO channel object will just go and map its buffer once and then only
> access it via the virtual mapping only, without going through the
> ->rd32()/->wr32() callbacks nor unmapping via ->unmap(). That means we
> effectively don't have a good point where we could emit the memory
> barriers.
> 
> I see two possibilities here: 1) make all accesses go through the
> accessors or 2) guard each series of accesses with a pair of nvkm_map()
> and nvkm_done() calls. Both of those would mean that all code paths need
> to be carefully audited.

Actually it looks like this is already working if I return 0 as the
address from the ->unmap() callback. That seems to result in the
->wr32() and ->rd32() callbacks getting called instead of the callers
trying to directly dereference the address, which obviously they now
can't.

So this seems like it could give me exactly what I need to make this
work. Again, this seems to get me past probe, but I see only a single
write at this point, so that's not saying much.

Thierry

> 
> One other thing I'm wondering is if it's okay to put all of this into
> the gk104_fifo implementation. I think the result of parameterizing on
> device->bar is pretty neat. Also, it seems like most of the rest of the
> code would have to be duplicated, or a lot of the gk104_*() function
> exported to a new implementation. So I'm not sure that it's really worth
> it.
> 
> Thierry
> 
>  .../drm/nouveau/include/nvkm/engine/fifo.h    |   7 +-
>  .../gpu/drm/nouveau/nvkm/engine/fifo/chan.c   | 157 ++++++++++++++++--
>  .../gpu/drm/nouveau/nvkm/engine/fifo/chan.h   |   6 +
>  .../gpu/drm/nouveau/nvkm/engine/fifo/gk104.c  |  29 +++-
>  4 files changed, 180 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h 
> b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
> index 4bd6e1e7c413..c0fb545efb2b 100644
> --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
> +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
> @@ -25,7 +25,12 @@ struct nvkm_fifo_chan {
>       struct nvkm_gpuobj *inst;
>       struct nvkm_gpuobj *push;
>       struct nvkm_vmm *vmm;
> -     void __iomem *user;
> +
> +     union {
> +             struct nvkm_memory *mem;
> +             void __iomem *user;
> +     };
> +
>       u64 addr;
>       u32 size;
>  
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c 
> b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
> index d83485385934..f47bc96bbb6d 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
> @@ -310,7 +310,7 @@ nvkm_fifo_chan_init(struct nvkm_object *object)
>  }
>  
>  static void *
> -nvkm_fifo_chan_dtor(struct nvkm_object *object)
> +__nvkm_fifo_chan_dtor(struct nvkm_object *object)
>  {
>       struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
>       struct nvkm_fifo *fifo = chan->fifo;
> @@ -324,9 +324,6 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object)
>       }
>       spin_unlock_irqrestore(&fifo->lock, flags);
>  
> -     if (chan->user)
> -             iounmap(chan->user);
> -
>       if (chan->vmm) {
>               nvkm_vmm_part(chan->vmm, chan->inst->memory);
>               nvkm_vmm_unref(&chan->vmm);
> @@ -337,6 +334,17 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object)
>       return data;
>  }
>  
> +static void *
> +nvkm_fifo_chan_dtor(struct nvkm_object *object)
> +{
> +     struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +     if (chan->user)
> +             iounmap(chan->user);
> +
> +     return __nvkm_fifo_chan_dtor(object);
> +}
> +
>  static const struct nvkm_object_func
>  nvkm_fifo_chan_func = {
>       .dtor = nvkm_fifo_chan_dtor,
> @@ -349,12 +357,98 @@ nvkm_fifo_chan_func = {
>       .sclass = nvkm_fifo_chan_child_get,
>  };
>  
> +static void *
> +nvkm_fifo_chan_mem_dtor(struct nvkm_object *object)
> +{
> +     return __nvkm_fifo_chan_dtor(object);
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_map(struct nvkm_object *object, void *argv, u32 argc,
> +                    enum nvkm_object_map *type, u64 *addr, u64 *size)
> +{
> +     struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +     pr_info("> %s(object=%px, argv=%px, argc=%u, type=%px, addr=%px, 
> size=%px)\n", __func__, object, argv, argc, type, addr, size);
> +
> +     *type = NVKM_OBJECT_MAP_VA;
> +     *addr = (u64)nvkm_kmap(chan->mem);
> +     *size = chan->size;
> +
> +     pr_info("  type: %d\n", *type);
> +     pr_info("  addr: %016llx\n", *addr);
> +     pr_info("  size: %016llx\n", *size);
> +     pr_info("< %s()\n", __func__);
> +     return 0;
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_unmap(struct nvkm_object *object)
> +{
> +     struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +     pr_info("> %s(object=%px)\n", __func__, object);
> +
> +     nvkm_done(chan->mem);
> +
> +     pr_info("< %s()\n", __func__);
> +     return 0;
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_rd32(struct nvkm_object *object, u64 addr, u32 *data)
> +{
> +     struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +     pr_info("> %s(object=%px, addr=%016llx, data=%px)\n", __func__, object, 
> addr, data);
> +
> +     if (unlikely(addr + 4 > chan->size))
> +             return -EINVAL;
> +
> +     *data = nvkm_ro32(chan->mem, addr);
> +
> +     pr_info("  data: %08x\n", *data);
> +     pr_info("< %s()\n", __func__);
> +     return 0;
> +}
> +
> +static int
> +nvkm_fifo_chan_mem_wr32(struct nvkm_object *object, u64 addr, u32 data)
> +{
> +     struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
> +
> +     pr_info("> %s(object=%px, addr=%016llx, data=%08x)\n", __func__, 
> object, addr, data);
> +
> +     if (unlikely(addr + 4 > chan->size))
> +             return -EINVAL;
> +
> +     nvkm_wo32(chan->mem, addr, data);
> +
> +     /* XXX add barrier */
> +
> +     pr_info("< %s()\n", __func__);
> +     return 0;
> +}
> +
> +static const struct nvkm_object_func
> +nvkm_fifo_chan_mem_func = {
> +     .dtor = nvkm_fifo_chan_mem_dtor,
> +     .init = nvkm_fifo_chan_init,
> +     .fini = nvkm_fifo_chan_fini,
> +     .ntfy = nvkm_fifo_chan_ntfy,
> +     .map = nvkm_fifo_chan_mem_map,
> +     .unmap = nvkm_fifo_chan_mem_unmap,
> +     .rd32 = nvkm_fifo_chan_mem_rd32,
> +     .wr32 = nvkm_fifo_chan_mem_wr32,
> +     .sclass = nvkm_fifo_chan_child_get,
> +};
> +
>  int
> -nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
> -                 struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
> -                 u64 hvmm, u64 push, u64 engines, int bar, u32 base,
> -                 u32 user, const struct nvkm_oclass *oclass,
> -                 struct nvkm_fifo_chan *chan)
> +__nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
> +                   struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
> +                   u64 hvmm, u64 push, u64 engines,
> +                   const struct nvkm_oclass *oclass,
> +                   struct nvkm_fifo_chan *chan)
>  {
>       struct nvkm_client *client = oclass->client;
>       struct nvkm_device *device = fifo->engine.subdev.device;
> @@ -362,7 +456,6 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func 
> *func,
>       unsigned long flags;
>       int ret;
>  
> -     nvkm_object_ctor(&nvkm_fifo_chan_func, oclass, &chan->object);
>       chan->func = func;
>       chan->fifo = fifo;
>       chan->engines = engines;
> @@ -412,6 +505,26 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func 
> *func,
>       __set_bit(chan->chid, fifo->mask);
>       spin_unlock_irqrestore(&fifo->lock, flags);
>  
> +     return 0;
> +}
> +
> +int
> +nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
> +                 struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
> +                 u64 hvmm, u64 push, u64 engines, int bar, u32 base,
> +                 u32 user, const struct nvkm_oclass *oclass,
> +                 struct nvkm_fifo_chan *chan)
> +{
> +     struct nvkm_device *device = fifo->engine.subdev.device;
> +     int ret;
> +
> +     nvkm_object_ctor(&nvkm_fifo_chan_func, oclass, &chan->object);
> +
> +     ret = __nvkm_fifo_chan_ctor(func, fifo, size, align, zero, hvmm, push,
> +                                 engines, oclass, chan);
> +     if (ret)
> +             return ret;
> +
>       /* determine address of this channel's user registers */
>       chan->addr = device->func->resource_addr(device, bar) +
>                    base + user * chan->chid;
> @@ -420,3 +533,27 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func 
> *func,
>       nvkm_fifo_cevent(fifo);
>       return 0;
>  }
> +
> +int nvkm_fifo_chan_mem_ctor(const struct nvkm_fifo_chan_func *func,
> +                         struct nvkm_fifo *fifo, u32 size, u32 align,
> +                         bool zero, u64 hvmm, u64 push, u64 engines,
> +                         struct nvkm_memory *mem, u32 user,
> +                         const struct nvkm_oclass *oclass,
> +                         struct nvkm_fifo_chan *chan)
> +{
> +     int ret;
> +
> +     nvkm_object_ctor(&nvkm_fifo_chan_mem_func, oclass, &chan->object);
> +
> +     ret = __nvkm_fifo_chan_ctor(func, fifo, size, align, zero, hvmm, push,
> +                                 engines, oclass, chan);
> +     if (ret)
> +             return ret;
> +
> +     chan->mem = mem;
> +     chan->addr = user * chan->chid;
> +     chan->size = user;
> +
> +     nvkm_fifo_cevent(fifo);
> +     return 0;
> +}
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h 
> b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
> index 177e10562600..71f32b1ebba0 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
> @@ -24,6 +24,12 @@ int nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func 
> *, struct nvkm_fifo *,
>                       u32 size, u32 align, bool zero, u64 vm, u64 push,
>                       u64 engines, int bar, u32 base, u32 user,
>                       const struct nvkm_oclass *, struct nvkm_fifo_chan *);
> +int nvkm_fifo_chan_mem_ctor(const struct nvkm_fifo_chan_func *,
> +                         struct nvkm_fifo *, u32 size, u32 align,
> +                         bool zero, u64 vm, u64 push, u64 engines,
> +                         struct nvkm_memory *mem, u32 user,
> +                         const struct nvkm_oclass *,
> +                         struct nvkm_fifo_chan *);
>  
>  struct nvkm_fifo_chan_oclass {
>       int (*ctor)(struct nvkm_fifo *, const struct nvkm_oclass *,
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c 
> b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
> index 81cbe1cc4804..5404a182eb0a 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
> @@ -906,7 +906,6 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
>       struct gk104_fifo *fifo = gk104_fifo(base);
>       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
>       struct nvkm_device *device = subdev->device;
> -     struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
>       int engn, runl, pbid, ret, i, j;
>       enum nvkm_devidx engidx;
>       u32 *map;
> @@ -967,12 +966,19 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
>       if (ret)
>               return ret;
>  
> -     ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
> -                        &fifo->user.bar);
> -     if (ret)
> -             return ret;
> +     if (device->bar) {
> +             struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
> +
> +             ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
> +                                &fifo->user.bar);
> +             if (ret)
> +                     return ret;
> +
> +             return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar,
> +                                    NULL, 0);
> +     }
>  
> -     return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0);
> +     return 0;
>  }
>  
>  static void
> @@ -998,7 +1004,12 @@ gk104_fifo_init(struct nvkm_fifo *base)
>               nvkm_wr32(device, 0x04014c + (i * 0x2000), 0xffffffff); /* 
> INTREN */
>       }
>  
> -     nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
> +     /* obsolete on GV100 and later */
> +     if (fifo->user.bar) {
> +             u32 value = 0x10000000 | fifo->user.bar->addr >> 12;
> +
> +             nvkm_wr32(device, 0x002254, value);
> +     }
>  
>       if (fifo->func->pbdma->init_timeout)
>               fifo->func->pbdma->init_timeout(fifo);
> @@ -1014,7 +1025,9 @@ gk104_fifo_dtor(struct nvkm_fifo *base)
>       struct nvkm_device *device = fifo->base.engine.subdev.device;
>       int i;
>  
> -     nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
> +     if (fifo->user.bar)
> +             nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
> +
>       nvkm_memory_unref(&fifo->user.mem);
>  
>       for (i = 0; i < fifo->runlist_nr; i++) {
> -- 
> 2.23.0
> 

Attachment: signature.asc
Description: PGP signature

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to