Hi, I've spent some time testing GPUs on aarch64 and fixing bugs. Attached are some patches to make things more usable.
I would like to commit these or get feedback on if changes should be implemented differently. Since I touched things in both MD code and UVM, review would be appreciated. First line of each patch contains a summary of what it does. amdgpu (bonaire C1K) on AADK: works[1][2] glamor, picom, alacritty, doomlegacy OK radeon (bonaire) on AADK: works[1], ditto radeon (cedar) on LX2K: works, minor rendering glitches [1] AADK firmware doesn't POST the GPU and VGA BIOS extraction fails. Must provide a VGA BIOS image dump manually, build a custom EDK2 with VGA POSTing capability or figure out how to map ROM. [2] /libdata/firmware/amdgpu is not shipped. (Can we include these in the sets?) Kind regards, -Tobias
This patch set adds a new UVM freelist on aarch64 to manage pages allocated to GPU buffers, which have a constraint on paddr < 40 bits. Index: sys/arch/aarch64/include/vmparam.h =================================================================== RCS file: /cvsroot/src/sys/arch/aarch64/include/vmparam.h,v retrieving revision 1.20 diff -p -u -r1.20 vmparam.h --- sys/arch/aarch64/include/vmparam.h 16 Apr 2023 14:01:51 -0000 1.20 +++ sys/arch/aarch64/include/vmparam.h 27 Dec 2023 22:07:38 -0000 @@ -182,8 +182,9 @@ #define VM_PHYSSEG_MAX 64 /* XXX */ #define VM_PHYSSEG_STRAT VM_PSTRAT_BSEARCH -#define VM_NFREELIST 1 +#define VM_NFREELIST 2 #define VM_FREELIST_DEFAULT 0 +#define VM_FREELIST_FIRST1T 1 #elif defined(__arm__) Index: sys/arch/evbarm/fdt/fdt_machdep.c =================================================================== RCS file: /cvsroot/src/sys/arch/evbarm/fdt/fdt_machdep.c,v retrieving revision 1.106 diff -p -u -r1.106 fdt_machdep.c --- sys/arch/evbarm/fdt/fdt_machdep.c 4 Aug 2023 09:06:33 -0000 1.106 +++ sys/arch/evbarm/fdt/fdt_machdep.c 27 Dec 2023 22:07:38 -0000 @@ -194,6 +194,11 @@ fdt_add_boot_physmem(const struct fdt_me bp->bp_start = atop(saddr); bp->bp_pages = atop(eaddr) - bp->bp_start; bp->bp_freelist = VM_FREELIST_DEFAULT; +#ifdef _LP64 + if (eaddr < (1UL<<40)) { + bp->bp_freelist = VM_FREELIST_FIRST1T; + } +#endif #ifdef PMAP_NEED_ALLOC_POOLPAGE const uint64_t memory_size = *(uint64_t *)arg; Index: sys/external/bsd/drm2/include/drm/bus_dma_hacks.h =================================================================== RCS file: /cvsroot/src/sys/external/bsd/drm2/include/drm/bus_dma_hacks.h,v retrieving revision 1.25 diff -p -u -r1.25 bus_dma_hacks.h --- sys/external/bsd/drm2/include/drm/bus_dma_hacks.h 19 Jul 2022 23:19:44 -0000 1.25 +++ sys/external/bsd/drm2/include/drm/bus_dma_hacks.h 27 Dec 2023 22:07:38 -0000 @@ -78,7 +78,7 @@ BUS_MEM_TO_PHYS(bus_dma_tag_t dmat, bus_ if (dr->dr_busbase <= ba && ba - dr->dr_busbase <= dr->dr_len) return ba - dr->dr_busbase + dr->dr_sysbase; } - panic("bus addr has no bus address in dma tag %p: %"PRIxPADDR, dmat, + panic("bus addr has no paddr in dma tag %p: %"PRIxPADDR, dmat, ba); } #elif defined(__sparc__) || defined(__sparc64__) @@ -99,6 +99,8 @@ bus_dmamem_pgfl(bus_dma_tag_t tag) { #if defined(__i386__) || defined(__x86_64__) return x86_select_freelist(tag->_bounce_alloc_hi - 1); +#elif defined(__aarch64__) + return VM_FREELIST_FIRST1T; #else return VM_FREELIST_DEFAULT; #endif
This patch set changes the type of uvm_physseg.start_hint from u_int to u_long. Index: sys/uvm/uvm_pglist.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pglist.c,v retrieving revision 1.90 diff -p -u -r1.90 uvm_pglist.c --- sys/uvm/uvm_pglist.c 21 Dec 2021 08:27:49 -0000 1.90 +++ sys/uvm/uvm_pglist.c 27 Dec 2023 22:06:10 -0000 @@ -112,8 +112,9 @@ static int uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, paddr_t alignment, paddr_t boundary, struct pglist *rlist) { - signed int candidate, limit, candidateidx, end, idx, skip; - int pagemask; + long candidate, limit, candidateidx, end, idx; + int skip; + long pagemask; bool second_pass; #ifdef DEBUG paddr_t idxpa, lastidxpa; @@ -138,9 +139,9 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, * succeeded. */ alignment = atop(alignment); - candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi) + + candidate = roundup2(ulmax(low, uvm_physseg_get_avail_start(psi) + uvm_physseg_get_start_hint(psi)), alignment); - limit = uimin(high, uvm_physseg_get_avail_end(psi)); + limit = ulmin(high, uvm_physseg_get_avail_end(psi)); pagemask = ~((boundary >> PAGE_SHIFT) - 1); skip = 0; second_pass = false; @@ -162,8 +163,8 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, * is were we started. */ second_pass = true; - candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi)), alignment); - limit = uimin(limit, uvm_physseg_get_avail_start(psi) + + candidate = roundup2(ulmax(low, uvm_physseg_get_avail_start(psi)), alignment); + limit = ulmin(limit, uvm_physseg_get_avail_start(psi) + uvm_physseg_get_start_hint(psi)); skip = 0; continue; @@ -200,7 +201,7 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, * Found a suitable starting page. See if the range is free. */ #ifdef PGALLOC_VERBOSE - printf("%s: psi=%d candidate=%#x end=%#x skip=%#x, align=%#"PRIxPADDR, + printf("%s: psi=%d candidate=%#lx end=%#lx skip=%#x, align=%#"PRIxPADDR, __func__, psi, candidateidx, end, skip, alignment); #endif /* @@ -283,7 +284,7 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, uvm_physseg_get_avail_start(psi)); KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi), - "%x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", + "%lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", candidate + num, uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi), uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi), @@ -523,7 +524,8 @@ static int uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high, struct pglist *rlist) { - int todo, limit, candidate; + int todo; + long limit, candidate; struct vm_page *pg; bool second_pass; #ifdef PGALLOC_VERBOSE @@ -546,9 +548,9 @@ uvm_pglistalloc_s_ps(uvm_physseg_t psi, return -1; todo = num; - candidate = uimax(low, uvm_physseg_get_avail_start(psi) + + candidate = ulmax(low, uvm_physseg_get_avail_start(psi) + uvm_physseg_get_start_hint(psi)); - limit = uimin(high, uvm_physseg_get_avail_end(psi)); + limit = ulmin(high, uvm_physseg_get_avail_end(psi)); pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); second_pass = false; @@ -560,8 +562,8 @@ again: break; } second_pass = true; - candidate = uimax(low, uvm_physseg_get_avail_start(psi)); - limit = uimin(limit, uvm_physseg_get_avail_start(psi) + + candidate = ulmax(low, uvm_physseg_get_avail_start(psi)); + limit = ulmin(limit, uvm_physseg_get_avail_start(psi) + uvm_physseg_get_start_hint(psi)); pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi)); goto again; @@ -571,10 +573,10 @@ again: paddr_t cidx = 0; const uvm_physseg_t bank = uvm_physseg_find(candidate, &cidx); KDASSERTMSG(bank == psi, - "uvm_physseg_find(%#x) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG, + "uvm_physseg_find(%#lx) (%"PRIxPHYSSEG ") != psi %"PRIxPHYSSEG, candidate, bank, psi); KDASSERTMSG(cidx == candidate - uvm_physseg_get_start(psi), - "uvm_physseg_find(%#x): %#"PRIxPADDR" != off %"PRIxPADDR, + "uvm_physseg_find(%#lx): %#"PRIxPADDR" != off %"PRIxPADDR, candidate, cidx, candidate - uvm_physseg_get_start(psi)); } #endif @@ -594,7 +596,7 @@ again: uvm_physseg_set_start_hint(psi, candidate + 1 - uvm_physseg_get_avail_start(psi)); KASSERTMSG(uvm_physseg_get_start_hint(psi) <= uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi), - "%#x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", + "%#lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")", candidate + 1, uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi), Index: sys/uvm/uvm_physseg.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_physseg.c,v retrieving revision 1.19 diff -p -u -r1.19 uvm_physseg.c --- sys/uvm/uvm_physseg.c 23 Sep 2023 18:20:20 -0000 1.19 +++ sys/uvm/uvm_physseg.c 27 Dec 2023 22:06:10 -0000 @@ -100,7 +100,7 @@ struct uvm_physseg { paddr_t avail_end; /* (PF# of last free page in segment) +1 */ struct extent *ext; /* extent(9) structure to manage pgs[] */ int free_list; /* which free list they belong on */ - u_int start_hint; /* start looking for free pages here */ + u_long start_hint; /* start looking for free pages here */ #ifdef __HAVE_PMAP_PHYSSEG struct pmap_physseg pmseg; /* pmap specific (MD) data */ #endif @@ -1108,7 +1108,7 @@ uvm_physseg_get_free_list(uvm_physseg_t return HANDLE_TO_PHYSSEG_NODE(upm)->free_list; } -u_int +u_long uvm_physseg_get_start_hint(uvm_physseg_t upm) { KASSERT(uvm_physseg_valid_p(upm)); @@ -1116,7 +1116,7 @@ uvm_physseg_get_start_hint(uvm_physseg_t } bool -uvm_physseg_set_start_hint(uvm_physseg_t upm, u_int start_hint) +uvm_physseg_set_start_hint(uvm_physseg_t upm, u_long start_hint) { if (uvm_physseg_valid_p(upm) == false) return false; Index: sys/uvm/uvm_physseg.h =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_physseg.h,v retrieving revision 1.8 diff -p -u -r1.8 uvm_physseg.h --- sys/uvm/uvm_physseg.h 2 Jan 2017 20:08:32 -0000 1.8 +++ sys/uvm/uvm_physseg.h 27 Dec 2023 22:06:10 -0000 @@ -106,8 +106,8 @@ struct pmap_physseg * uvm_physseg_get_pm #endif int uvm_physseg_get_free_list(uvm_physseg_t); -u_int uvm_physseg_get_start_hint(uvm_physseg_t); -bool uvm_physseg_set_start_hint(uvm_physseg_t, u_int); +u_long uvm_physseg_get_start_hint(uvm_physseg_t); +bool uvm_physseg_set_start_hint(uvm_physseg_t, u_long); /* * Functions to help walk the list of segments.
This patch works around rendering corruption on aarch64. Index: external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c =================================================================== RCS file: /cvsroot/xsrc/external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c,v retrieving revision 1.1.1.3 diff -p -u -r1.1.1.3 si_buffer.c --- external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c 9 May 2022 01:23:33 -0000 1.1.1.3 +++ external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c 27 Dec 2023 23:08:23 -0000 @@ -100,6 +100,10 @@ void si_init_resource_fields(struct si_s */ if (!sscreen->info.kernel_flushes_hdp_before_ib || !sscreen->info.is_amdgpu) res->domains = RADEON_DOMAIN_GTT; +#if defined(PIPE_ARCH_AARCH64) + /* https://gitlab.freedesktop.org/mesa/mesa/-/issues/3274 */ + res->domains = RADEON_DOMAIN_GTT; +#endif } /* Tiled textures are unmappable. Always put them in VRAM. */