Hi,

I've spent some time testing GPUs on aarch64 and fixing bugs.
Attached are some patches to make things more usable.

I would like to commit these or get feedback on if changes should be
implemented differently. Since I touched things in both MD code
and UVM, review would be appreciated. First line of each patch
contains a summary of what it does.

amdgpu (bonaire C1K) on AADK: works[1][2]
  glamor, picom, alacritty, doomlegacy OK
radeon (bonaire) on AADK: works[1], ditto
radeon (cedar) on LX2K: works, minor rendering glitches

[1] AADK firmware doesn't POST the GPU and VGA BIOS extraction fails.
    Must provide a VGA BIOS image dump manually, build a custom
    EDK2 with VGA POSTing capability or figure out how to map ROM.
[2] /libdata/firmware/amdgpu is not shipped.
    (Can we include these in the sets?)

Kind regards,
-Tobias
This patch set adds a new UVM freelist on aarch64 to manage pages
allocated to GPU buffers, which have a constraint on paddr < 40 bits.

Index: sys/arch/aarch64/include/vmparam.h
===================================================================
RCS file: /cvsroot/src/sys/arch/aarch64/include/vmparam.h,v
retrieving revision 1.20
diff -p -u -r1.20 vmparam.h
--- sys/arch/aarch64/include/vmparam.h  16 Apr 2023 14:01:51 -0000      1.20
+++ sys/arch/aarch64/include/vmparam.h  27 Dec 2023 22:07:38 -0000
@@ -182,8 +182,9 @@
 #define VM_PHYSSEG_MAX         64              /* XXX */
 #define VM_PHYSSEG_STRAT       VM_PSTRAT_BSEARCH
 
-#define VM_NFREELIST           1
+#define VM_NFREELIST           2
 #define VM_FREELIST_DEFAULT    0
+#define VM_FREELIST_FIRST1T    1
 
 #elif defined(__arm__)
 
Index: sys/arch/evbarm/fdt/fdt_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/evbarm/fdt/fdt_machdep.c,v
retrieving revision 1.106
diff -p -u -r1.106 fdt_machdep.c
--- sys/arch/evbarm/fdt/fdt_machdep.c   4 Aug 2023 09:06:33 -0000       1.106
+++ sys/arch/evbarm/fdt/fdt_machdep.c   27 Dec 2023 22:07:38 -0000
@@ -194,6 +194,11 @@ fdt_add_boot_physmem(const struct fdt_me
        bp->bp_start = atop(saddr);
        bp->bp_pages = atop(eaddr) - bp->bp_start;
        bp->bp_freelist = VM_FREELIST_DEFAULT;
+#ifdef _LP64
+       if (eaddr < (1UL<<40)) {
+               bp->bp_freelist = VM_FREELIST_FIRST1T;
+       }
+#endif
 
 #ifdef PMAP_NEED_ALLOC_POOLPAGE
        const uint64_t memory_size = *(uint64_t *)arg;
Index: sys/external/bsd/drm2/include/drm/bus_dma_hacks.h
===================================================================
RCS file: /cvsroot/src/sys/external/bsd/drm2/include/drm/bus_dma_hacks.h,v
retrieving revision 1.25
diff -p -u -r1.25 bus_dma_hacks.h
--- sys/external/bsd/drm2/include/drm/bus_dma_hacks.h   19 Jul 2022 23:19:44 
-0000      1.25
+++ sys/external/bsd/drm2/include/drm/bus_dma_hacks.h   27 Dec 2023 22:07:38 
-0000
@@ -78,7 +78,7 @@ BUS_MEM_TO_PHYS(bus_dma_tag_t dmat, bus_
                if (dr->dr_busbase <= ba && ba - dr->dr_busbase <= dr->dr_len)
                        return ba - dr->dr_busbase + dr->dr_sysbase;
        }
-       panic("bus addr has no bus address in dma tag %p: %"PRIxPADDR, dmat,
+       panic("bus addr has no paddr in dma tag %p: %"PRIxPADDR, dmat,
            ba);
 }
 #elif defined(__sparc__) || defined(__sparc64__)
@@ -99,6 +99,8 @@ bus_dmamem_pgfl(bus_dma_tag_t tag)
 {
 #if defined(__i386__) || defined(__x86_64__)
        return x86_select_freelist(tag->_bounce_alloc_hi - 1);
+#elif defined(__aarch64__)
+       return VM_FREELIST_FIRST1T;
 #else
        return VM_FREELIST_DEFAULT;
 #endif
This patch set changes the type of uvm_physseg.start_hint from u_int to u_long.

Index: sys/uvm/uvm_pglist.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pglist.c,v
retrieving revision 1.90
diff -p -u -r1.90 uvm_pglist.c
--- sys/uvm/uvm_pglist.c        21 Dec 2021 08:27:49 -0000      1.90
+++ sys/uvm/uvm_pglist.c        27 Dec 2023 22:06:10 -0000
@@ -112,8 +112,9 @@ static int
 uvm_pglistalloc_c_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
     paddr_t alignment, paddr_t boundary, struct pglist *rlist)
 {
-       signed int candidate, limit, candidateidx, end, idx, skip;
-       int pagemask;
+       long candidate, limit, candidateidx, end, idx;
+       int skip;
+       long pagemask;
        bool second_pass;
 #ifdef DEBUG
        paddr_t idxpa, lastidxpa;
@@ -138,9 +139,9 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, 
         * succeeded.
         */
        alignment = atop(alignment);
-       candidate = roundup2(uimax(low, uvm_physseg_get_avail_start(psi) +
+       candidate = roundup2(ulmax(low, uvm_physseg_get_avail_start(psi) +
                uvm_physseg_get_start_hint(psi)), alignment);
-       limit = uimin(high, uvm_physseg_get_avail_end(psi));
+       limit = ulmin(high, uvm_physseg_get_avail_end(psi));
        pagemask = ~((boundary >> PAGE_SHIFT) - 1);
        skip = 0;
        second_pass = false;
@@ -162,8 +163,8 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, 
                         * is were we started.
                         */
                        second_pass = true;
-                       candidate = roundup2(uimax(low, 
uvm_physseg_get_avail_start(psi)), alignment);
-                       limit = uimin(limit, uvm_physseg_get_avail_start(psi) +
+                       candidate = roundup2(ulmax(low, 
uvm_physseg_get_avail_start(psi)), alignment);
+                       limit = ulmin(limit, uvm_physseg_get_avail_start(psi) +
                            uvm_physseg_get_start_hint(psi));
                        skip = 0;
                        continue;
@@ -200,7 +201,7 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, 
                 * Found a suitable starting page.  See if the range is free.
                 */
 #ifdef PGALLOC_VERBOSE
-               printf("%s: psi=%d candidate=%#x end=%#x skip=%#x, 
align=%#"PRIxPADDR,
+               printf("%s: psi=%d candidate=%#lx end=%#lx skip=%#x, 
align=%#"PRIxPADDR,
                    __func__, psi, candidateidx, end, skip, alignment);
 #endif
                /*
@@ -283,7 +284,7 @@ uvm_pglistalloc_c_ps(uvm_physseg_t psi, 
            uvm_physseg_get_avail_start(psi));
        KASSERTMSG(uvm_physseg_get_start_hint(psi) <=
            uvm_physseg_get_avail_end(psi) - uvm_physseg_get_avail_start(psi),
-           "%x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
+           "%lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
            candidate + num,
            uvm_physseg_get_start_hint(psi), uvm_physseg_get_start_hint(psi),
            uvm_physseg_get_avail_end(psi), uvm_physseg_get_avail_start(psi),
@@ -523,7 +524,8 @@ static int
 uvm_pglistalloc_s_ps(uvm_physseg_t psi, int num, paddr_t low, paddr_t high,
     struct pglist *rlist)
 {
-       int todo, limit, candidate;
+       int todo;
+       long limit, candidate;
        struct vm_page *pg;
        bool second_pass;
 #ifdef PGALLOC_VERBOSE
@@ -546,9 +548,9 @@ uvm_pglistalloc_s_ps(uvm_physseg_t psi, 
                return -1;
 
        todo = num;
-       candidate = uimax(low, uvm_physseg_get_avail_start(psi) +
+       candidate = ulmax(low, uvm_physseg_get_avail_start(psi) +
            uvm_physseg_get_start_hint(psi));
-       limit = uimin(high, uvm_physseg_get_avail_end(psi));
+       limit = ulmin(high, uvm_physseg_get_avail_end(psi));
        pg = uvm_physseg_get_pg(psi, candidate - uvm_physseg_get_start(psi));
        second_pass = false;
 
@@ -560,8 +562,8 @@ again:
                                break;
                        }
                        second_pass = true;
-                       candidate = uimax(low, 
uvm_physseg_get_avail_start(psi));
-                       limit = uimin(limit, uvm_physseg_get_avail_start(psi) +
+                       candidate = ulmax(low, 
uvm_physseg_get_avail_start(psi));
+                       limit = ulmin(limit, uvm_physseg_get_avail_start(psi) +
                            uvm_physseg_get_start_hint(psi));
                        pg = uvm_physseg_get_pg(psi, candidate - 
uvm_physseg_get_start(psi));
                        goto again;
@@ -571,10 +573,10 @@ again:
                        paddr_t cidx = 0;
                        const uvm_physseg_t bank = uvm_physseg_find(candidate, 
&cidx);
                        KDASSERTMSG(bank == psi,
-                           "uvm_physseg_find(%#x) (%"PRIxPHYSSEG ") != psi 
%"PRIxPHYSSEG,
+                           "uvm_physseg_find(%#lx) (%"PRIxPHYSSEG ") != psi 
%"PRIxPHYSSEG,
                             candidate, bank, psi);
                        KDASSERTMSG(cidx == candidate - 
uvm_physseg_get_start(psi),
-                           "uvm_physseg_find(%#x): %#"PRIxPADDR" != off 
%"PRIxPADDR,
+                           "uvm_physseg_find(%#lx): %#"PRIxPADDR" != off 
%"PRIxPADDR,
                             candidate, cidx, candidate - 
uvm_physseg_get_start(psi));
                }
 #endif
@@ -594,7 +596,7 @@ again:
        uvm_physseg_set_start_hint(psi, candidate + 1 - 
uvm_physseg_get_avail_start(psi));
        KASSERTMSG(uvm_physseg_get_start_hint(psi) <= 
uvm_physseg_get_avail_end(psi) -
            uvm_physseg_get_avail_start(psi),
-           "%#x %u (%#x) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
+           "%#lx %lu (%#lx) <= %#"PRIxPADDR" - %#"PRIxPADDR" (%#"PRIxPADDR")",
            candidate + 1,
            uvm_physseg_get_start_hint(psi),
            uvm_physseg_get_start_hint(psi),
Index: sys/uvm/uvm_physseg.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_physseg.c,v
retrieving revision 1.19
diff -p -u -r1.19 uvm_physseg.c
--- sys/uvm/uvm_physseg.c       23 Sep 2023 18:20:20 -0000      1.19
+++ sys/uvm/uvm_physseg.c       27 Dec 2023 22:06:10 -0000
@@ -100,7 +100,7 @@ struct uvm_physseg {
        paddr_t avail_end;              /* (PF# of last free page in segment) 
+1  */
        struct  extent *ext;            /* extent(9) structure to manage pgs[] 
*/
        int     free_list;              /* which free list they belong on */
-       u_int   start_hint;             /* start looking for free pages here */
+       u_long  start_hint;             /* start looking for free pages here */
 #ifdef __HAVE_PMAP_PHYSSEG
        struct  pmap_physseg pmseg;     /* pmap specific (MD) data */
 #endif
@@ -1108,7 +1108,7 @@ uvm_physseg_get_free_list(uvm_physseg_t 
        return HANDLE_TO_PHYSSEG_NODE(upm)->free_list;
 }
 
-u_int
+u_long
 uvm_physseg_get_start_hint(uvm_physseg_t upm)
 {
        KASSERT(uvm_physseg_valid_p(upm));
@@ -1116,7 +1116,7 @@ uvm_physseg_get_start_hint(uvm_physseg_t
 }
 
 bool
-uvm_physseg_set_start_hint(uvm_physseg_t upm, u_int start_hint)
+uvm_physseg_set_start_hint(uvm_physseg_t upm, u_long start_hint)
 {
        if (uvm_physseg_valid_p(upm) == false)
                return false;
Index: sys/uvm/uvm_physseg.h
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_physseg.h,v
retrieving revision 1.8
diff -p -u -r1.8 uvm_physseg.h
--- sys/uvm/uvm_physseg.h       2 Jan 2017 20:08:32 -0000       1.8
+++ sys/uvm/uvm_physseg.h       27 Dec 2023 22:06:10 -0000
@@ -106,8 +106,8 @@ struct      pmap_physseg * uvm_physseg_get_pm
 #endif
 
 int uvm_physseg_get_free_list(uvm_physseg_t);
-u_int uvm_physseg_get_start_hint(uvm_physseg_t);
-bool uvm_physseg_set_start_hint(uvm_physseg_t, u_int);
+u_long uvm_physseg_get_start_hint(uvm_physseg_t);
+bool uvm_physseg_set_start_hint(uvm_physseg_t, u_long);
 
 /*
  * Functions to help walk the list of segments.
This patch works around rendering corruption on aarch64.

Index: external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c
===================================================================
RCS file: 
/cvsroot/xsrc/external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c,v
retrieving revision 1.1.1.3
diff -p -u -r1.1.1.3 si_buffer.c
--- external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c  9 May 
2022 01:23:33 -0000       1.1.1.3
+++ external/mit/MesaLib/dist/src/gallium/drivers/radeonsi/si_buffer.c  27 Dec 
2023 23:08:23 -0000
@@ -100,6 +100,10 @@ void si_init_resource_fields(struct si_s
        */
       if (!sscreen->info.kernel_flushes_hdp_before_ib || 
!sscreen->info.is_amdgpu)
          res->domains = RADEON_DOMAIN_GTT;
+#if defined(PIPE_ARCH_AARCH64)
+      /* https://gitlab.freedesktop.org/mesa/mesa/-/issues/3274 */
+      res->domains = RADEON_DOMAIN_GTT;
+#endif
    }
 
    /* Tiled textures are unmappable. Always put them in VRAM. */

Reply via email to