DRM/GEM uses uvm_aobj for long-term pageable graphics buffers, but when these buffers are assigned physical pages whose addresses can be programmed into the GPU's page tables, only certain physical pages are allowed -- specifically, Intel GPUs can handle only 32-bit, 36-bit, or 40-bit physical addresses, depending on the model. Normally we use bus_dmamem_alloc and bus_dmatag_subregion to impose these constraints, but bus_dmamem memory is not pageable.
When I wrote the code to hook GEM objects up to uvm_aobj last summer I kinda quietly hoped this wouldn't be a problem, but it turns out this is a problem in practice. The attached patch (a) implements a uvm page allocation strategy UVM_PGA_STRAT_LIMITED which lets the caller specify low and high addresses, for which uvm_pagealloc defers to uvm_pglistalloc; (b) rearranges locking in uvm_pglistalloc a little so this works; (c) adds a uao_limit_paddr(uao, low, high) to let a uao client specify bounds on the allowed physical addresses; and (d) uses uao_limit_paddr in i915drmkms. It doesn't change page allocation in any other case: uao still uses the normal page allocation strategy if you don't call uao_limit_paddr, and other calls to uvm_pagealloc are not affected. Comments? Objections? Lewd Spenserian sonnets? This was the least invasive surgery I could think of to accomplish the task. One might consider a bus_dma_constrain_uvm_object(dmat, uobj) which would set a parameter in uobj so that uvm_pagealloc for it would limit itself to the constraints imposed by the bus_dma tag dmat. However, that would require new MD code, add overhead to every UVM object, and probably serve no more purpose than the attached patch. But I'm willing to hear alternative approaches too -- I make no claim that the way I did it is optimal.
Index: sys/external/bsd/drm2/dist/drm/i915/i915_gem.c =================================================================== RCS file: /cvsroot/src/sys/external/bsd/drm2/dist/drm/i915/i915_gem.c,v retrieving revision 1.9 diff -p -u -r1.9 i915_gem.c --- sys/external/bsd/drm2/dist/drm/i915/i915_gem.c 20 May 2014 15:15:04 -0000 1.9 +++ sys/external/bsd/drm2/dist/drm/i915/i915_gem.c 20 May 2014 20:16:58 -0000 @@ -4193,10 +4193,13 @@ struct drm_i915_gem_object *i915_gem_all size_t size) { struct drm_i915_gem_object *obj; -#ifndef __NetBSD__ /* XXX >32bit dma? */ +#ifdef __linux__ struct address_space *mapping; u32 mask; #endif +#ifdef __NetBSD__ + paddr_t high; +#endif obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (obj == NULL) @@ -4207,7 +4210,7 @@ struct drm_i915_gem_object *i915_gem_all return NULL; } -#ifndef __NetBSD__ /* XXX >32bit dma? */ +#ifdef __linux__ mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { /* 965gm cannot relocate objects above 4GiB. */ @@ -4218,6 +4221,17 @@ struct drm_i915_gem_object *i915_gem_all mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; mapping_set_gfp_mask(mapping, mask); #endif +#ifdef __NetBSD__ + /* + * 965GM can't handle >32-bit paddrs; all other models can't + * handle >40-bit paddrs. + */ + if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) + high = 0xffffffffULL; + else + high = 0xffffffffffULL; + uao_limit_paddr(obj->base.gemo_shm_uao, 0, high); +#endif i915_gem_object_init(obj, &i915_gem_object_ops); Index: sys/uvm/uvm_aobj.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_aobj.c,v retrieving revision 1.120 diff -p -u -r1.120 uvm_aobj.c --- sys/uvm/uvm_aobj.c 25 Oct 2013 20:22:55 -0000 1.120 +++ sys/uvm/uvm_aobj.c 20 May 2014 20:16:58 -0000 @@ -146,6 +146,8 @@ struct uvm_aobj { struct uao_swhash *u_swhash; u_long u_swhashmask; /* mask for hashtable */ LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */ + paddr_t u_paddr_low; /* optional physical address constraints */ + paddr_t u_paddr_high; }; static void uao_free(struct uvm_aobj *); @@ -161,6 +163,8 @@ static bool uao_pagein(struct uvm_aobj * static bool uao_pagein_page(struct uvm_aobj *, int); #endif /* defined(VMSWAP) */ +static struct vm_page *uao_pagealloc(struct uvm_object *, voff_t, int); + /* * aobj_pager * @@ -436,6 +440,13 @@ uao_create(vsize_t size, int flags) } /* + * default physical address range covers all possible addresses + */ + + aobj->u_paddr_low = 0; + aobj->u_paddr_high = ~(paddr_t)0; + + /* * allocate hash/array if necessary * * note: in the KERNSWAP case no need to worry about locking since @@ -490,6 +501,38 @@ uao_create(vsize_t size, int flags) } /* + * uao_limit_paddr: limit pages to lie within a paddr range + * + * => must be called before any pages are allocated for the object. + */ + +void +uao_limit_paddr(struct uvm_object *uobj, paddr_t low, paddr_t high) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + + aobj->u_paddr_low = MAX(low, aobj->u_paddr_low); + aobj->u_paddr_high = MIN(high, aobj->u_paddr_high); +} + +/* + * uao_pagealloc: allocate a page for aobj. + */ + +static inline struct vm_page * +uao_pagealloc(struct uvm_object *uobj, voff_t offset, int flags) +{ + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; + + if (__predict_true(aobj->u_paddr_low == 0) && + __predict_true(aobj->u_paddr_high = ~(paddr_t)0)) + return uvm_pagealloc(uobj, offset, NULL, flags); + else + return uvm_pagealloc_limited(uobj, offset, NULL, flags, + aobj->u_paddr_low, aobj->u_paddr_high); +} + +/* * uao_init: set up aobj pager subsystem * * => called at boot time from uvm_pager_init() @@ -864,8 +907,8 @@ uao_get(struct uvm_object *uobj, voff_t if (ptmp == NULL && uao_find_swslot(uobj, current_offset >> PAGE_SHIFT) == 0) { - ptmp = uvm_pagealloc(uobj, current_offset, - NULL, UVM_FLAG_COLORMATCH|UVM_PGA_ZERO); + ptmp = uao_pagealloc(uobj, current_offset, + UVM_FLAG_COLORMATCH|UVM_PGA_ZERO); if (ptmp) { /* new page */ ptmp->flags &= ~(PG_FAKE); @@ -959,8 +1002,7 @@ gotpage: /* not resident? allocate one now (if we can) */ if (ptmp == NULL) { - ptmp = uvm_pagealloc(uobj, current_offset, - NULL, 0); + ptmp = uao_pagealloc(uobj, current_offset, 0); /* out of RAM? */ if (ptmp == NULL) { Index: sys/uvm/uvm_extern.h =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_extern.h,v retrieving revision 1.189 diff -p -u -r1.189 uvm_extern.h --- sys/uvm/uvm_extern.h 21 Feb 2014 22:08:07 -0000 1.189 +++ sys/uvm/uvm_extern.h 20 May 2014 20:16:58 -0000 @@ -165,6 +165,7 @@ #define UVM_PGA_STRAT_NORMAL 0 /* priority (low id to high) walk */ #define UVM_PGA_STRAT_ONLY 1 /* only specified free list */ #define UVM_PGA_STRAT_FALLBACK 2 /* ONLY falls back on NORMAL */ +#define UVM_PGA_STRAT_LIMITED 3 /* limited physical address range */ /* * flags for uvm_pagealloc_strat() @@ -539,5 +540,6 @@ void vunmapbuf(struct buf *, vsize_t); /* uvm_aobj.c */ struct uvm_object *uao_create(vsize_t, int); +void uao_limit_paddr(struct uvm_object *, paddr_t, paddr_t); void uao_detach(struct uvm_object *); void uao_reference(struct uvm_object *); @@ -686,8 +689,15 @@ int uvm_obj_wirepages(struct uvm_objec void uvm_obj_unwirepages(struct uvm_object *, off_t, off_t); /* uvm_page.c */ -struct vm_page *uvm_pagealloc_strat(struct uvm_object *, - voff_t, struct vm_anon *, int, int, int); +struct vm_page *uvm_pagealloc_strat_limited(struct uvm_object *, + voff_t, struct vm_anon *, int, int, int, + paddr_t, paddr_t); +#define uvm_pagealloc_limited(obj, off, anon, flags, low, high) \ + uvm_pagealloc_strat_limited((obj), (off), (anon), (flags), \ + UVM_PGA_STRAT_LIMITED, 0, (low), (high)) +#define uvm_pagealloc_strat(obj, off, anon, flags, strat, freelist) \ + uvm_pagealloc_strat_limited((obj), (off), (anon), (flags), \ + (strat), (freelist), 0, 0) #define uvm_pagealloc(obj, off, anon, flags) \ uvm_pagealloc_strat((obj), (off), (anon), (flags), \ UVM_PGA_STRAT_NORMAL, 0) @@ -717,6 +727,8 @@ void uvm_estimatepageable(int *, int * /* uvm_pglist.c */ int uvm_pglistalloc(psize_t, paddr_t, paddr_t, paddr_t, paddr_t, struct pglist *, int, int); +int uvm_pglistalloc_locked(psize_t, paddr_t, paddr_t, + paddr_t, paddr_t, struct pglist *, int, int); void uvm_pglistfree(struct pglist *); /* uvm_swap.c */ Index: sys/uvm/uvm_page.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_page.c,v retrieving revision 1.184 diff -p -u -r1.184 uvm_page.c --- sys/uvm/uvm_page.c 21 Apr 2014 16:33:48 -0000 1.184 +++ sys/uvm/uvm_page.c 20 May 2014 20:16:58 -0000 @@ -1210,8 +1210,9 @@ uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, */ struct vm_page * -uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, - int flags, int strat, int free_list) +uvm_pagealloc_strat_limited(struct uvm_object *obj, voff_t off, + struct vm_anon *anon, int flags, int strat, int free_list, + paddr_t low, paddr_t high) { int lcv, try1, try2, zeroit = 0, color; struct uvm_cpu *ucpu; @@ -1314,6 +1315,25 @@ uvm_pagealloc_strat(struct uvm_object *o /* No pages free! */ goto fail; + case UVM_PGA_STRAT_LIMITED: { + struct pglist list; + int error; + + /* XXX Request zero/nonzero page, request/require colour... */ + error = uvm_pglistalloc_locked(PAGE_SIZE, low, high, PAGE_SIZE, + 0, &list, 1, 0 /*!waitok*/); + if (error) + goto fail; + + KASSERT(!TAILQ_EMPTY(&list)); + pg = TAILQ_FIRST(&list); + KASSERT(TAILQ_NEXT(pg, pageq.queue) == NULL); + + color = VM_PGCOLOR_BUCKET(pg); + + goto gotit; + } + default: panic("uvm_pagealloc_strat: bad strat %d", strat); /* NOTREACHED */ Index: sys/uvm/uvm_pglist.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pglist.c,v retrieving revision 1.65 diff -p -u -r1.65 uvm_pglist.c --- sys/uvm/uvm_pglist.c 19 May 2014 05:48:14 -0000 1.65 +++ sys/uvm/uvm_pglist.c 20 May 2014 20:16:58 -0000 @@ -43,6 +43,8 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c #include <uvm/uvm.h> #include <uvm/uvm_pdpolicy.h> +static void uvm_pglistfree_locked(struct pglist *); + #ifdef VM_PAGE_ALLOC_MEMORY_STATS #define STAT_INCR(v) (v)++ #define STAT_DECR(v) do { \ @@ -308,14 +310,11 @@ uvm_pglistalloc_contig(int num, paddr_t struct vm_physseg *ps; int error; + KASSERT(mutex_owned(&uvm_fpageqlock)); + /* Default to "lose". */ error = ENOMEM; - /* - * Block all memory allocation and lock the free list. - */ - mutex_spin_enter(&uvm_fpageqlock); - /* Are there even any free pages? */ if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) goto out; @@ -353,7 +352,6 @@ out: */ uvm_kick_pdaemon(); - mutex_spin_exit(&uvm_fpageqlock); return (error); } @@ -446,15 +444,12 @@ uvm_pglistalloc_simple(int num, paddr_t int fl, psi, error; struct vm_physseg *ps; + KASSERT(mutex_owned(&uvm_fpageqlock)); + /* Default to "lose". */ error = ENOMEM; again: - /* - * Block all memory allocation and lock the free list. - */ - mutex_spin_enter(&uvm_fpageqlock); - /* Are there even any free pages? */ if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) goto out; @@ -487,7 +482,6 @@ out: */ uvm_kick_pdaemon(); - mutex_spin_exit(&uvm_fpageqlock); if (error) { if (waitok) { @@ -495,11 +489,15 @@ out: #ifdef DEBUG printf("pglistalloc waiting\n"); #endif + mutex_spin_exit(&uvm_fpageqlock); uvm_wait("pglalloc"); + mutex_spin_enter(&uvm_fpageqlock); goto again; - } else - uvm_pglistfree(rlist); + } else { + uvm_pglistfree_locked(rlist); + } } + KASSERT(mutex_owned(&uvm_fpageqlock)); #ifdef PGALLOC_VERBOSE if (!error) printf("pgalloc: %"PRIxMAX"..%"PRIxMAX"\n", @@ -510,11 +508,13 @@ out: } int -uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, - paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) +uvm_pglistalloc_locked(psize_t size, paddr_t low, paddr_t high, + paddr_t alignment, paddr_t boundary, struct pglist *rlist, int nsegs, + int waitok) { int num, res; + KASSERT(mutex_owned(&uvm_fpageqlock)); KASSERT((alignment & (alignment - 1)) == 0); KASSERT((boundary & (boundary - 1)) == 0); @@ -541,6 +541,23 @@ uvm_pglistalloc(psize_t size, paddr_t lo return (res); } +int +uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, + paddr_t boundary, struct pglist *rlist, int nsegs, int waitok) +{ + int error; + + /* + * Block all memory allocation and lock the free list. + */ + mutex_spin_enter(&uvm_fpageqlock); + error = uvm_pglistalloc_locked(size, low, high, alignment, boundary, + rlist, nsegs, waitok); + mutex_spin_exit(&uvm_fpageqlock); + + return error; +} + /* * uvm_pglistfree: free a list of pages * @@ -550,16 +567,22 @@ uvm_pglistalloc(psize_t size, paddr_t lo void uvm_pglistfree(struct pglist *list) { + + mutex_spin_enter(&uvm_fpageqlock); + uvm_pglistfree_locked(list); + mutex_spin_exit(&uvm_fpageqlock); +} + +static void +uvm_pglistfree_locked(struct pglist *list) +{ struct uvm_cpu *ucpu; struct vm_page *pg; int index, color, queue; bool iszero; - /* - * Lock the free list and free each page. - */ + KASSERT(mutex_owned(&uvm_fpageqlock)); - mutex_spin_enter(&uvm_fpageqlock); ucpu = curcpu()->ci_data.cpu_uvm; while ((pg = TAILQ_FIRST(list)) != NULL) { KASSERT(!uvmpdpol_pageisqueued_p(pg)); @@ -590,5 +613,4 @@ uvm_pglistfree(struct pglist *list) } if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) ucpu->page_idle_zero = vm_page_zero_enable; - mutex_spin_exit(&uvm_fpageqlock); }