On Monday, May 18, 2026 5:02 AM Aaron Kling wrote: > On Thu, May 14, 2026 at 9:35 PM Mikko Perttunen <[email protected]> wrote: > > > > When a buffer object is pinned via host1x_bo_pin() with a cache, the > > resulting mapping is kept in the cache so it can be reused on subsequent > > pins. Each mapping held a reference to the underlying host1x_bo (taken > > in tegra_bo_pin / gather_bo_pin), so as long as a mapping was cached, > > the bo itself could not be freed. > > > > However, the only way to remove the cached mapping was through the free > > path of the buffer object. This meant that if a bo got cached, it could > > never get freed again. > > > > Resolve the circularity by holding a weak reference to the bo from the > > cache side. This is done by having the .pin callbacks not bump the bo's > > refcount -- instead the common Host1x bo code does so, except for the > > cache reference. > > > > Also move the remove-cache-mapping-on-free code into a common function > > inside Host1x code. This is only called from the TegraDRM GEM buffers > > since those are the only ones that can be cached at the moment. > > > > Reported-by: Aaron Kling <[email protected]> > > Fixes: 1f39b1dfa53c ("drm/tegra: Implement buffer object cache") > > Signed-off-by: Mikko Perttunen <[email protected]> > > --- > > drivers/gpu/drm/tegra/gem.c | 13 ++------- > > drivers/gpu/drm/tegra/submit.c | 3 +-- > > drivers/gpu/host1x/bus.c | 60 > > +++++++++++++++++++++++++++++++++++++++++- > > include/linux/host1x.h | 7 +++++ > > 4 files changed, 69 insertions(+), 14 deletions(-) > > > > diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c > > index d2bae88ad545..2377e2b76397 100644 > > --- a/drivers/gpu/drm/tegra/gem.c > > +++ b/drivers/gpu/drm/tegra/gem.c > > @@ -69,7 +69,7 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct > > device *dev, struct host1x_ > > return ERR_PTR(-ENOMEM); > > > > kref_init(&map->ref); > > - map->bo = host1x_bo_get(bo); > > + map->bo = bo; > > map->direction = direction; > > map->dev = dev; > > > > @@ -170,7 +170,6 @@ static void tegra_bo_unpin(struct host1x_bo_mapping > > *map) > > kfree(map->sgt); > > } > > > > - host1x_bo_put(map->bo); > > kfree(map); > > } > > > > @@ -509,17 +508,9 @@ static struct tegra_bo *tegra_bo_import(struct > > drm_device *drm, > > void tegra_bo_free_object(struct drm_gem_object *gem) > > { > > struct tegra_drm *tegra = gem->dev->dev_private; > > - struct host1x_bo_mapping *mapping, *tmp; > > struct tegra_bo *bo = to_tegra_bo(gem); > > > > - /* remove all mappings of this buffer object from any caches */ > > - list_for_each_entry_safe(mapping, tmp, &bo->base.mappings, list) { > > - if (mapping->cache) > > - host1x_bo_unpin(mapping); > > - else > > - dev_err(gem->dev->dev, "mapping %p stale for device > > %s\n", mapping, > > - dev_name(mapping->dev)); > > - } > > + host1x_bo_clear_cached_mappings(&bo->base); > > > > if (tegra->domain) { > > tegra_bo_iommu_unmap(tegra, bo); > > diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c > > index 3009b8b9e619..e5841857c937 100644 > > --- a/drivers/gpu/drm/tegra/submit.c > > +++ b/drivers/gpu/drm/tegra/submit.c > > @@ -76,7 +76,7 @@ gather_bo_pin(struct device *dev, struct host1x_bo *bo, > > enum dma_data_direction > > return ERR_PTR(-ENOMEM); > > > > kref_init(&map->ref); > > - map->bo = host1x_bo_get(bo); > > + map->bo = bo; > > map->direction = direction; > > map->dev = dev; > > > > @@ -117,7 +117,6 @@ static void gather_bo_unpin(struct host1x_bo_mapping > > *map) > > dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); > > sg_free_table(map->sgt); > > kfree(map->sgt); > > - host1x_bo_put(map->bo); > > > > kfree(map); > > } > > diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c > > index f814eb4941c0..772e05a7b45b 100644 > > --- a/drivers/gpu/host1x/bus.c > > +++ b/drivers/gpu/host1x/bus.c > > @@ -887,6 +887,20 @@ int host1x_client_resume(struct host1x_client *client) > > } > > EXPORT_SYMBOL(host1x_client_resume); > > > > +/** > > + * host1x_bo_pin() - Create a DMA mapping for the buffer object > > + * @dev: Device onto which DMA map to > > + * @bo: Buffer object to map > > + * @dir: DMA direction > > + * @cache: Cache in which to store mapping, or NULL > > + * > > + * Creates a DMA mapping pointing to @bo for @dev. The refcount of @bo is > > incremented > > + * until host1x_bo_unpin is called. > > + * > > + * If @cache is specified, the mapping is also stored in the cache and not > > released > > + * until @bo is freed (refcount drops to zero). This improves performance > > when a buffer > > + * is pinned and unpinned frequently as in the case of display use. > > + */ > > struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct > > host1x_bo *bo, > > enum dma_data_direction dir, > > struct host1x_bo_cache *cache) > > @@ -899,6 +913,7 @@ struct host1x_bo_mapping *host1x_bo_pin(struct device > > *dev, struct host1x_bo *bo > > list_for_each_entry(mapping, &cache->mappings, entry) { > > if (mapping->bo == bo && mapping->direction == dir) > > { > > kref_get(&mapping->ref); > > + host1x_bo_get(bo); > > goto unlock; > > } > > } > > @@ -908,6 +923,8 @@ struct host1x_bo_mapping *host1x_bo_pin(struct device > > *dev, struct host1x_bo *bo > > if (IS_ERR(mapping)) > > goto unlock; > > > > + host1x_bo_get(bo); > > + > > spin_lock(&mapping->bo->lock); > > list_add_tail(&mapping->list, &bo->mappings); > > spin_unlock(&mapping->bo->lock); > > @@ -918,7 +935,12 @@ struct host1x_bo_mapping *host1x_bo_pin(struct device > > *dev, struct host1x_bo *bo > > > > list_add_tail(&mapping->entry, &cache->mappings); > > > > - /* bump reference count to track the copy in the cache */ > > + /* > > + * Bump the mapping reference count to track the mapping in > > the cache, > > + * but do not bump the BO's refcount. This allows the BO to > > still get freed, > > + * triggering the release of the cache mapping through > > + * host1x_bo_clear_cached_mappings. > > + */ > > kref_get(&mapping->ref); > > } > > > > @@ -948,9 +970,17 @@ static void __host1x_bo_unpin(struct kref *ref) > > mapping->bo->ops->unpin(mapping); > > } > > > > +/** > > + * host1x_bo_unpin() - Release an established DMA mapping of a buffer > > object > > + * @mapping: Mapping to release > > + * > > + * Unmaps the given @mapping, unless it is cached. Decreases the refcount > > on > > + * the underlying buffer object. > > + */ > > void host1x_bo_unpin(struct host1x_bo_mapping *mapping) > > { > > struct host1x_bo_cache *cache = mapping->cache; > > + struct host1x_bo *bo = mapping->bo; > > > > if (cache) > > mutex_lock(&cache->lock); > > @@ -959,5 +989,33 @@ void host1x_bo_unpin(struct host1x_bo_mapping *mapping) > > > > if (cache) > > mutex_unlock(&cache->lock); > > + > > + host1x_bo_put(bo); > > } > > EXPORT_SYMBOL(host1x_bo_unpin); > > + > > +/** > > + * host1x_bo_clear_cached_mappings() - Remove all cached mappings pointing > > at a bo > > + * @bo: Buffer object to release mappings of > > + * > > + * Drops references to any mappings pointing to @bo left in any caches. > > This must > > + * be called by any host1x_bo implementers that may be pinned with caching > > enabled > > + * before freeing the bo. > > + */ > > +void host1x_bo_clear_cached_mappings(struct host1x_bo *bo) > > +{ > > + struct host1x_bo_mapping *mapping, *tmp; > > + struct host1x_bo_cache *cache; > > + > > + list_for_each_entry_safe(mapping, tmp, &bo->mappings, list) { > > + cache = mapping->cache; > > + if (WARN_ON(!cache)) > > + continue; > > + > > + mutex_lock(&mapping->cache->lock); > > + WARN_ON(kref_read(&mapping->ref) != 1); > > + __host1x_bo_unpin(&mapping->ref); > > + mutex_unlock(&mapping->cache->lock); > > + } > > +} > > +EXPORT_SYMBOL(host1x_bo_clear_cached_mappings); > > diff --git a/include/linux/host1x.h b/include/linux/host1x.h > > index 5e7a63143a4a..d8f052a85b75 100644 > > --- a/include/linux/host1x.h > > +++ b/include/linux/host1x.h > > @@ -143,6 +143,12 @@ static inline struct host1x_bo_mapping > > *to_host1x_bo_mapping(struct kref *ref) > > return container_of(ref, struct host1x_bo_mapping, ref); > > } > > > > +/** > > + * struct host1x_bo_ops - operations implemented by a host1x_bo provider > > + * > > + * @pin: create a DMA mapping. Implementation must not touch the bo's > > refcount. > > + * @unpin: destroy a DMA mapping. Implementation must not touch the bo's > > refcount. > > + */ > > struct host1x_bo_ops { > > struct host1x_bo *(*get)(struct host1x_bo *bo); > > void (*put)(struct host1x_bo *bo); > > @@ -181,6 +187,7 @@ struct host1x_bo_mapping *host1x_bo_pin(struct device > > *dev, struct host1x_bo *bo > > enum dma_data_direction dir, > > struct host1x_bo_cache *cache); > > void host1x_bo_unpin(struct host1x_bo_mapping *map); > > +void host1x_bo_clear_cached_mappings(struct host1x_bo *bo); > > > > static inline void *host1x_bo_mmap(struct host1x_bo *bo) > > { > > > > -- > > 2.53.0 > > > > I have verified this on a Jetson AGX Xavier devkit, a Jetson Xavier NX > devkit, a Jetson TX2 devkit, and a Jetson TX2 NX in a p3509 carrier. > No longer seeing allocation failures nor am I seeing any obvious > regressions. I am currently unable to boot any t210 device to Android > userspace to verify those for various preexisting reasons, and Android > recovery does not reallocate buffers to stress this issue. > > Tested-by: Aaron Kling <[email protected]> > > Aaron
Thank you!
