The branch main has been updated by adrian:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=aa6b871ea77e5b52cf4683c5f304a82d2e351ba0

commit aa6b871ea77e5b52cf4683c5f304a82d2e351ba0
Author:     Marco Devesas Campos <[email protected]>
AuthorDate: 2025-10-20 02:50:19 +0000
Commit:     Adrian Chadd <[email protected]>
CommitDate: 2025-11-15 03:27:46 +0000

    arm64: Add support to vchiq and bcm2835_audio (plus some fixes)
    
    Add 64 bit support to vchiq:
    
     * update fields to the appropriate fixed bit-size variants (everywhere 
[cf. e.g., ref:sizes and ref:sizes2])
     * refer to event semaphores (that go into the very 32 bit VC) by offset 
instead of pointers [ref:sems]
     * dsb() is dsb(sy) in arm64 (vchiq_{core.c,core.h,kmod.c}) [ref:dsb]
     * comment out some unneeded code in parse_rx_slots around 
VCHIQ_MSG_BULK_RX (cf. [ref:deadcode])
     * adapt remote_event_signal to arm64 caching behaviours (vchiq_kmod.c)
     * refactor synchronization around remote_event_signal, forcing a wmb to be 
on the safe side; thereby make it look more like what linux does [ref:sync] 
(vchiq_{core,kmod}.c); and make a comment in vchiq_core.c true (wasn't before)
     * add a few more syncs to be on the safe side (vchiq_2835_arm.c)
     * use arm64 dcache invalidation mechanisms (vchiq_2835_arm.c)
     * explicitly invalidate pages on arm64 post bulk-read (vchiq_2835_arm.c)
     * support bulk transfers on rpi-4 (aka "long address space" transfers), by 
hard-coding their vc offset (0) and different bit-shift [ref:longbulk] 
(vchiq_2835_arm.c)
     * refactor a loop-of-constant-test (vchiq_2835_arm.c)
     * use the correct (hard-coded) cache-line size on arm64
     * rework the handling of chipset "features" to account for the extra 
behaviours with 64 bit chipsets. (vchiq_kmod.c)
     * add sysctl-s (log, arm_log) to control debug (vchiq_kmod.c)
     * add example kernel config (GENERIC-VCHIQ)
    
    Fixes:
    
     * Rework error handling in create_pagelist, avoiding a potential panic when
       freeing memory that had been dmamem_alloc, a potential null dereference,
       and a leak when having problems pinning pages (vchiq_2835_arm.c)
     * fix a confusion about the behaviour cv_wait_sig that lead to
       uninterruptible looping (vchiq_bsd.c)
     * implement detection of fatal signals (vchiq_bsd.c)
     * fix a confusion with the name of a variable introduced by #a0b8746
       that could lead to a panic when closing the cdev file (vchiq_arm.c)
     * release user connection when destructing cdevpriv and avoid user 
processes
       sharing connection data, which lead to stalls and data corruption. 
(vchiq_arm.c)
    
    Update bcm2835_audio to work on 64bit systems:
    
     * update VC audio fields (vc_vchi_audioserv_defs.h, bcm2835_audio.c)
     * repurpose the hitherto unused callback field to help push a 64 bit 
pointer in (bcm2835_audio.c)
     * increase (hopefully) the robustness of the code that shifts data to VC 
(bcm2835_audio.c)
     * add a sysctl to control the amount of debugging info output by 
bcm2835_audio.c
    
    Tested on zero, zero2 and 4+ with ping, functional, bulk and control 
vchiq_test-s, and omxplayer
    
      [ref:dsb]: 
https://github.com/raspberrypi/linux/commit/35b7ebda57affcfd3616d39d5a727a4495b31123
      [ref:sems]: 
https://github.com/raspberrypi/linux/commit/24a4262afb10907fce3cdbc3ae336fcf4cdaece5
      [ref:sizes]: 
https://github.com/raspberrypi/linux/commit/e64568b8ea6c04e747e432c17ce2452652075216
      [ref:sizes2]: 
https://github.com/raspberrypi/linux/commit/f9bee6dd24addfa00c2c8d50c25b73efbfbb28ba
      [ref:deadcode]: 
https://github.com/raspberrypi/linux/commit/14f4d72fb799a9b3170a45ab80d4a3ddad541960
      [ref:sync]: 
https://github.com/raspberrypi/linux/commit/51c071265079319583e4c6e8c61e09660300d0bf
      [ref:longbulk]: 
https://github.com/raspberrypi/linux/commit/37f6f19a83722c9b866cecb5e455b2e16e5bbc6b
    
    Differential Revision:  https://reviews.freebsd.org/D37878
    Submitted by: Marco Devesas Campos <[email protected]>
---
 sys/arm/broadcom/bcm2835/bcm2835_audio.c           | 152 +++++++++++--
 sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h  |   8 +-
 sys/arm64/conf/std.broadcom                        |   3 +
 sys/contrib/vchiq/interface/compat/vchi_bsd.c      |  12 +-
 .../vchiq/interface/vchiq_arm/vchiq_2835_arm.c     | 145 +++++++++++--
 sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c  | 235 +++++++++++----------
 sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.c |  91 ++++----
 sys/contrib/vchiq/interface/vchiq_arm/vchiq_core.h |  11 +-
 sys/contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c |  76 ++++++-
 .../vchiq/interface/vchiq_arm/vchiq_pagelist.h     |   8 +-
 sys/contrib/vchiq/interface/vchiq_arm/vchiq_shim.c |   4 +-
 11 files changed, 531 insertions(+), 214 deletions(-)

diff --git a/sys/arm/broadcom/bcm2835/bcm2835_audio.c 
b/sys/arm/broadcom/bcm2835/bcm2835_audio.c
index 06bbc67bd7bd..1406fcc3d952 100644
--- a/sys/arm/broadcom/bcm2835/bcm2835_audio.c
+++ b/sys/arm/broadcom/bcm2835/bcm2835_audio.c
@@ -113,6 +113,12 @@ struct bcm2835_audio_chinfo {
        uint64_t retrieved_samples;
        uint64_t underruns;
        int starved;
+       struct bcm_log_vars {
+               unsigned int bsize ;
+               int slept_for_lack_of_space ;
+       } log_vars;
+#define DEFAULT_LOG_VALUES \
+       ((struct bcm_log_vars) { .bsize = 0 , .slept_for_lack_of_space = 0 })
 };
 
 struct bcm2835_audio_info {
@@ -164,6 +170,10 @@ struct bcm2835_audio_info {
                        device_printf((sc)->dev, __VA_ARGS__);  \
        } while(0)
 
+/* Useful for circular buffer calcs */
+#define MOD_DIFF(front,rear,mod) (((mod) + (front) - (rear)) % (mod))
+
+
 static const char *
 dest_description(uint32_t dest)
 {
@@ -237,10 +247,21 @@ bcm2835_audio_callback(void *param, const 
VCHI_CALLBACK_REASON_T reason, void *m
                            m.type);
                }
        } else if (m.type == VC_AUDIO_MSG_TYPE_COMPLETE) {
-               struct bcm2835_audio_chinfo *ch = m.u.complete.cookie;
+               unsigned int signaled = 0;
+               struct bcm2835_audio_chinfo *ch ;
+#if defined(__aarch64__)
+               ch = (void *) ((((size_t)m.u.complete.callback) << 32)
+                   | ((size_t)m.u.complete.cookie));
+#else
+               ch = (void *) (m.u.complete.cookie);
+#endif
 
                int count = m.u.complete.count & 0xffff;
                int perr = (m.u.complete.count & (1U << 30)) != 0;
+
+               BCM2835_LOG_TRACE(sc, "in:: count:0x%x perr:%d\n",
+                   m.u.complete.count, perr);
+
                ch->callbacks++;
                if (perr)
                        ch->underruns++;
@@ -264,13 +285,31 @@ bcm2835_audio_callback(void *param, const 
VCHI_CALLBACK_REASON_T reason, void *m
                                            (uintmax_t)ch->retrieved_samples,
                                            (uintmax_t)ch->submitted_samples);
                                }
-                               ch->available_space += count;
-                               ch->retrieved_samples += count;
                        }
-                       if (perr || (ch->available_space >= 
VCHIQ_AUDIO_PACKET_SIZE))
-                               cv_signal(&sc->worker_cv);
+                       ch->available_space += count;
+                       ch->retrieved_samples += count;
+                       /*
+                        *  XXXMDC
+                        *  Experimental: if VC says it's empty, believe it
+                        *  Has to come after the usual adjustments
+                        */
+                       if(perr){
+                               ch->available_space = VCHIQ_AUDIO_BUFFER_SIZE;
+                               perr = ch->retrieved_samples; // shd be != 0
+                       }
+
+                       if ((ch->available_space >= 1*VCHIQ_AUDIO_PACKET_SIZE)){
+                                       cv_signal(&sc->worker_cv);
+                               signaled = 1;
+                       }
                }
                BCM2835_AUDIO_UNLOCK(sc);
+               if(perr){
+                       BCM2835_LOG_WARN(sc,
+                           "VC starved; reported %u for a total of %u\n"
+                           "worker %s\n", count, perr,
+                           (signaled ? "signaled": "not signaled"));
+               }
        } else
                BCM2835_LOG_WARN(sc, "%s: unknown m.type: %d\n", __func__,
                    m.type);
@@ -371,6 +410,7 @@ bcm2835_audio_stop(struct bcm2835_audio_chinfo *ch)
                m.type = VC_AUDIO_MSG_TYPE_STOP;
                m.u.stop.draining = 0;
 
+               BCM2835_LOG_INFO(sc,"sending stop\n");
                ret = vchi_msg_queue(sc->vchi_handle,
                    &m, sizeof m, VCHI_FLAGS_BLOCK_UNTIL_QUEUED, NULL);
 
@@ -449,18 +489,25 @@ static bool
 bcm2835_audio_buffer_should_sleep(struct bcm2835_audio_chinfo *ch)
 {
 
+       ch->log_vars.slept_for_lack_of_space = 0;
        if (ch->playback_state != PLAYBACK_PLAYING)
                return (true);
 
        /* Not enough data */
-       if (sndbuf_getready(ch->buffer) < VCHIQ_AUDIO_PACKET_SIZE) {
-               printf("starve\n");
+       /* XXXMDC Take unsubmitted stuff into account */
+       if (sndbuf_getready(ch->buffer)
+                       - MOD_DIFF(
+                               ch->unsubmittedptr,
+                               sndbuf_getreadyptr(ch->buffer),
+                               ch->buffer->bufsize
+                       ) < VCHIQ_AUDIO_PACKET_SIZE) {
                ch->starved++;
                return (true);
        }
 
        /* Not enough free space */
        if (ch->available_space < VCHIQ_AUDIO_PACKET_SIZE) {
+               ch->log_vars.slept_for_lack_of_space = 1;
                return (true);
        }
 
@@ -481,8 +528,13 @@ bcm2835_audio_write_samples(struct bcm2835_audio_chinfo 
*ch, void *buf, uint32_t
        m.type = VC_AUDIO_MSG_TYPE_WRITE;
        m.u.write.count = count;
        m.u.write.max_packet = VCHIQ_AUDIO_PACKET_SIZE;
-       m.u.write.callback = NULL;
-       m.u.write.cookie = ch;
+#if defined(__aarch64__)
+       m.u.write.callback = (uint32_t)(((size_t) ch) >> 32) & 0xffffffff;
+       m.u.write.cookie = (uint32_t)(((size_t) ch) & 0xffffffff);
+#else
+       m.u.write.callback = (uint32_t) NULL;
+       m.u.write.cookie = (uint32_t) ch;
+#endif
        m.u.write.silence = 0;
 
        ret = vchi_msg_queue(sc->vchi_handle,
@@ -529,6 +581,11 @@ bcm2835_audio_worker(void *data)
                while ((sc->flags_pending == 0) &&
                    bcm2835_audio_buffer_should_sleep(ch)) {
                        cv_wait_sig(&sc->worker_cv, &sc->lock);
+                       if ((sc->flags_pending == 0) &&
+                           (ch->log_vars.slept_for_lack_of_space)) {
+                               BCM2835_LOG_TRACE(sc,
+                                   "slept for lack of space\n");
+                       }
                }
                flags = sc->flags_pending;
                /* Clear pending flags */
@@ -555,16 +612,25 @@ bcm2835_audio_worker(void *data)
                        BCM2835_AUDIO_LOCK(sc);
                        bcm2835_audio_reset_channel(&sc->pch);
                        ch->playback_state = PLAYBACK_IDLE;
+                       long sub_total = ch->submitted_samples;
+                       long retd = ch->retrieved_samples;
                        BCM2835_AUDIO_UNLOCK(sc);
+                       BCM2835_LOG_INFO(sc,
+                           "stopped audio. submitted a total of %lu "
+                           "having been acked %lu\n", sub_total, retd);
                        continue;
                }
 
                /* Requested to start playback */
                if ((flags & AUDIO_PLAY) &&
                    (ch->playback_state == PLAYBACK_IDLE)) {
+                       BCM2835_LOG_INFO(sc, "starting audio\n");
+                       unsigned int bsize = ch->buffer->bufsize;
                        BCM2835_AUDIO_LOCK(sc);
                        ch->playback_state = PLAYBACK_PLAYING;
+                       ch->log_vars.bsize = bsize;
                        BCM2835_AUDIO_UNLOCK(sc);
+                       BCM2835_LOG_INFO(sc, "buffer size is %u\n", bsize);
                        bcm2835_audio_start(ch);
                }
 
@@ -574,19 +640,65 @@ bcm2835_audio_worker(void *data)
                if (sndbuf_getready(ch->buffer) == 0)
                        continue;
 
-               count = sndbuf_getready(ch->buffer);
+               uint32_t i_count;
+
+               /* XXXMDC Take unsubmitted stuff into account */
+               count = i_count = sndbuf_getready(ch->buffer)
+                   - MOD_DIFF(ch->unsubmittedptr,
+                    sndbuf_getreadyptr(ch->buffer),
+                    ch->buffer->bufsize);
                size = ch->buffer->bufsize;
-               readyptr = sndbuf_getreadyptr(ch->buffer);
+               readyptr = ch->unsubmittedptr;
+
+               int size_changed = 0;
+               unsigned int available;
 
                BCM2835_AUDIO_LOCK(sc);
-               if (readyptr + count > size)
+               if (size != ch->log_vars.bsize) {
+                       ch->log_vars.bsize = size;
+                       size_changed = 1;
+               }
+               available = ch->available_space;
+               /*
+                *  XXXMDC
+                *
+                *  On arm64, got into situations where
+                *  readyptr was less than a packet away
+                *  from the end of the buffer, which led
+                *  to count being set to 0 and, inexorably, starvation.
+                *  Code below tries to take that into account.
+                *  The problem might have been fixed with some of the
+                *  other changes that were made in the meantime,
+                *  but for now this works fine.
+                */
+               if (readyptr + count > size) {
                        count = size - readyptr;
-               count = min(count, ch->available_space);
-               count -= (count % VCHIQ_AUDIO_PACKET_SIZE);
+               }
+               if(count > ch->available_space){
+                       count = ch->available_space;
+                       count -= (count % VCHIQ_AUDIO_PACKET_SIZE);
+               }else if (count > VCHIQ_AUDIO_PACKET_SIZE){
+                       count -= (count % VCHIQ_AUDIO_PACKET_SIZE);
+               }else if (size > count + readyptr) {
+                       count = 0;
+               }
                BCM2835_AUDIO_UNLOCK(sc);
 
-               if (count < VCHIQ_AUDIO_PACKET_SIZE)
+               if (count % VCHIQ_AUDIO_PACKET_SIZE != 0) {
+                       BCM2835_LOG_WARN(sc, "count: %u  initial count: %u  "
+                           "size: %u  readyptr: %u  available: %u\n", count,
+                           i_count,size,readyptr,available);
+               }
+               if (size_changed)
+                   BCM2835_LOG_INFO(sc, "bsize changed to %u\n", size);
+
+               if (count == 0) {
+                       BCM2835_LOG_WARN(sc,
+                           "not enough room for a packet: count %d,"
+                           " i_count %d, rptr %d, size %d\n",
+                           count, i_count, readyptr, size);
                        continue;
+               }
 
                buf = ch->buffer->buf + readyptr;
 
@@ -596,8 +708,16 @@ bcm2835_audio_worker(void *data)
                    ch->buffer->bufsize;
                ch->available_space -= count;
                ch->submitted_samples += count;
+               long sub = count;
+               long sub_total = ch->submitted_samples;
+               long retd = ch->retrieved_samples;
                KASSERT(ch->available_space >= 0, ("ch->available_space == 
%d\n", ch->available_space));
                BCM2835_AUDIO_UNLOCK(sc);
+
+               BCM2835_LOG_TRACE(sc,
+                   "submitted %lu for a total of %lu having been acked %lu; "
+                   "rptr %d, had %u available\n", sub, sub_total, retd,
+                   readyptr, available);
        }
 
        BCM2835_AUDIO_LOCK(sc);
@@ -650,6 +770,8 @@ bcmchan_init(kobj_t obj, void *devinfo, struct snd_dbuf *b, 
struct pcm_channel *
                return NULL;
        }
 
+       ch->log_vars = DEFAULT_LOG_VALUES;
+
        BCM2835_AUDIO_LOCK(sc);
        bcm2835_worker_update_params(sc);
        BCM2835_AUDIO_UNLOCK(sc);
diff --git a/sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h 
b/sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h
index 896e706ff492..ea972ff2d001 100644
--- a/sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h
+++ b/sys/arm/broadcom/bcm2835/vc_vchi_audioserv_defs.h
@@ -112,8 +112,8 @@ typedef struct
 typedef struct
 {
        uint32_t count; /* in bytes */
-       void *callback;
-       void *cookie;
+       uint32_t callback;
+       uint32_t cookie;
        uint16_t silence;
        uint16_t max_packet;
 } VC_AUDIO_WRITE_T;
@@ -129,8 +129,8 @@ typedef struct
 typedef struct
 {
        int32_t count;  /* Success value */
-       void *callback;
-       void *cookie;
+       uint32_t callback;
+       uint32_t cookie;
 } VC_AUDIO_COMPLETE_T;
 
 /* Message header for all messages in HOST->VC direction */
diff --git a/sys/arm64/conf/std.broadcom b/sys/arm64/conf/std.broadcom
index 3332aaac0826..65bee16e315d 100644
--- a/sys/arm64/conf/std.broadcom
+++ b/sys/arm64/conf/std.broadcom
@@ -33,5 +33,8 @@ device                sdhci
 options        FDT
 device         acpi
 
+# Sound support
+device         vchiq
+
 # DTBs
 makeoptions    MODULES_EXTRA+="dtb/rpi"
diff --git a/sys/contrib/vchiq/interface/compat/vchi_bsd.c 
b/sys/contrib/vchiq/interface/compat/vchi_bsd.c
index 8f47b3dc02d6..08f2f66dfc54 100644
--- a/sys/contrib/vchiq/interface/compat/vchi_bsd.c
+++ b/sys/contrib/vchiq/interface/compat/vchi_bsd.c
@@ -340,7 +340,6 @@ down_interruptible(struct semaphore *s)
        int ret ;
 
        ret = 0;
-
        mtx_lock(&s->mtx);
 
        while (s->value == 0) {
@@ -348,13 +347,11 @@ down_interruptible(struct semaphore *s)
                ret = cv_wait_sig(&s->cv, &s->mtx);
                s->waiters--;
 
-               if (ret == EINTR) {
+               /* XXXMDC As per its semaphore.c, linux can only return EINTR */
+               if (ret) {
                        mtx_unlock(&s->mtx);
-                       return (-EINTR);
+                       return -EINTR;
                }
-
-               if (ret == ERESTART)
-                       continue;
        }
 
        s->value--;
@@ -441,8 +438,7 @@ flush_signals(VCHIQ_THREAD_T thr)
 int
 fatal_signal_pending(VCHIQ_THREAD_T thr)
 {
-       printf("Implement ME: %s\n", __func__);
-       return (0);
+       return (curproc_sigkilled());
 }
 
 /*
diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c 
b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
index 185e81e71bdc..7e105a6b3b77 100644
--- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c
@@ -65,9 +65,24 @@ MALLOC_DEFINE(M_VCPAGELIST, "vcpagelist", "VideoCore 
pagelist memory");
 
 #define MAX_FRAGMENTS (VCHIQ_NUM_CURRENT_BULKS * 2)
 
+/*
+ *  XXXMDC
+ * Do this less ad-hoc-y -- e.g.
+ * 
https://github.com/raspberrypi/linux/commit/c683db8860a80562a2bb5b451d77b3e471d24f36
+ */
+#if defined(__aarch64__)
+int g_cache_line_size = 64;
+#else
 int g_cache_line_size = 32;
+#endif
 static int g_fragment_size;
 
+unsigned int g_long_bulk_space = 0;
+#define VM_PAGE_TO_VC_BULK_PAGE(x) (\
+       g_long_bulk_space ? VM_PAGE_TO_PHYS(x)\
+                : PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(x))\
+)
+
 typedef struct vchiq_2835_state_struct {
    int inited;
    VCHIQ_ARM_STATE_T arm_state;
@@ -113,6 +128,54 @@ vchiq_dmamap_cb(void *arg, bus_dma_segment_t *segs, int 
nseg, int err)
        *addr = PHYS_TO_VCBUS(segs[0].ds_addr);
 }
 
+#if defined(__aarch64__) /* See comment in free_pagelist */
+static int
+invalidate_cachelines_in_range_of_ppage(
+       vm_page_t p,
+       size_t offset,
+       size_t count
+)
+{
+       if(offset + count > PAGE_SIZE){ return EINVAL; }
+        uint8_t *dst = (uint8_t*)pmap_quick_enter_page(p);
+        if (!dst){
+                return ENOMEM;
+       }
+       cpu_dcache_inv_range((void *)((vm_offset_t)dst + offset), count);
+       pmap_quick_remove_page((vm_offset_t)dst);
+       return 0;
+}
+
+/* XXXMDC bulk instead of loading and invalidating single pages? */
+static void
+invalidate_cachelines_in_range_of_ppage_seq(vm_page_t *p, size_t start,
+    size_t count)
+{
+       if (start >= PAGE_SIZE)
+               goto invalid_input;
+
+#define _NEXT_AT(x,_m) (((x)+((_m)-1)) & ~((_m)-1))   /* for power of two m */
+       size_t offset = _NEXT_AT(start,g_cache_line_size);
+#undef _NEXT_AT
+       count = (offset < start + count) ? count - (offset - start) : 0;
+       offset = offset & (PAGE_SIZE - 1);
+       for (size_t done = 0; count > done;
+           p++, done += PAGE_SIZE - offset, offset = 0) {
+               size_t in_page = PAGE_SIZE - offset;
+               size_t todo = (count-done > in_page) ? in_page : count-done;
+               int e = invalidate_cachelines_in_range_of_ppage(*p, offset, 
todo);
+               if (e != 0)
+                       goto problem_in_loop;
+       }
+       return;
+
+problem_in_loop:
+invalid_input:
+       WARN_ON(1);
+       return;
+}
+#endif
+
 static int
 copyout_page(vm_page_t p, size_t offset, void *kaddr, size_t size)
 {
@@ -171,7 +234,7 @@ vchiq_platform_init(VCHIQ_STATE_T *state)
                goto failed_load;
        }
 
-       WARN_ON(((int)g_slot_mem & (PAGE_SIZE - 1)) != 0);
+       WARN_ON(((size_t)g_slot_mem & (PAGE_SIZE - 1)) != 0);
 
        vchiq_slot_zero = vchiq_init_slots(g_slot_mem, g_slot_mem_size);
        if (!vchiq_slot_zero) {
@@ -391,13 +454,14 @@ pagelist_page_free(vm_page_t pp)
 ** from increased speed as a result.
 */
 
+
 static int
 create_pagelist(char __user *buf, size_t count, unsigned short type,
        struct proc *p, BULKINFO_T *bi)
 {
        PAGELIST_T *pagelist;
        vm_page_t* pages;
-       unsigned long *addrs;
+       uint32_t *addrs;
        unsigned int num_pages, i;
        vm_offset_t offset;
        int pagelist_size;
@@ -434,7 +498,7 @@ create_pagelist(char __user *buf, size_t count, unsigned 
short type,
 
        err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist,
            BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map);
-       if (err) {
+       if (err || !pagelist) {
                vchiq_log_error(vchiq_core_log_level, "Unable to allocate 
pagelist memory");
                err = -ENOMEM;
                goto failed_alloc;
@@ -447,6 +511,7 @@ create_pagelist(char __user *buf, size_t count, unsigned 
short type,
        if (err) {
                vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for 
pagelist memory");
                err = -ENOMEM;
+               bi->pagelist = pagelist;
                goto failed_load;
        }
 
@@ -463,8 +528,9 @@ create_pagelist(char __user *buf, size_t count, unsigned 
short type,
        if (actual_pages != num_pages) {
                if (actual_pages > 0)
                        vm_page_unhold_pages(pages, actual_pages);
-               free(pagelist, M_VCPAGELIST);
-               return (-ENOMEM);
+               err = -ENOMEM;
+               bi->pagelist = pagelist;
+               goto failed_hold;
        }
 
        pagelist->length = count;
@@ -473,27 +539,28 @@ create_pagelist(char __user *buf, size_t count, unsigned 
short type,
 
        /* Group the pages into runs of contiguous pages */
 
-       base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0]));
+       size_t run_ceil = g_long_bulk_space ? 0x100 : PAGE_SIZE;
+       unsigned int pg_addr_rshift = g_long_bulk_space ? 4 : 0;
+       base_addr = (void *) VM_PAGE_TO_VC_BULK_PAGE(pages[0]);
        next_addr = base_addr + PAGE_SIZE;
        addridx = 0;
        run = 0;
-
+#define _PG_BLOCK(base,run) \
+               ((((size_t) (base)) >> pg_addr_rshift) & ~(run_ceil-1)) + (run)
        for (i = 1; i < num_pages; i++) {
-               addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i]));
-               if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) {
+               addr = (void *)VM_PAGE_TO_VC_BULK_PAGE(pages[i]);
+               if ((addr == next_addr) && (run < run_ceil - 1)) {
                        next_addr += PAGE_SIZE;
                        run++;
                } else {
-                       addrs[addridx] = (unsigned long)base_addr + run;
-                       addridx++;
+                       addrs[addridx++] = (uint32_t) _PG_BLOCK(base_addr,run);
                        base_addr = addr;
                        next_addr = addr + PAGE_SIZE;
                        run = 0;
                }
        }
-
-       addrs[addridx] = (unsigned long)base_addr + run;
-       addridx++;
+       addrs[addridx++] = _PG_BLOCK(base_addr, run);
+#undef _PG_BLOCK
 
        /* Partial cache lines (fragments) require special measures */
        if ((type == PAGELIST_READ) &&
@@ -514,20 +581,35 @@ create_pagelist(char __user *buf, size_t count, unsigned 
short type,
                WARN_ON(fragments == NULL);
                g_free_fragments = *(char **) g_free_fragments;
                up(&g_free_fragments_mutex);
-               pagelist->type =
-                        PAGELIST_READ_WITH_FRAGMENTS + 
-                        (fragments - g_fragments_base)/g_fragment_size;
+               pagelist->type = PAGELIST_READ_WITH_FRAGMENTS
+                    + (fragments - g_fragments_base)/g_fragment_size;
+#if defined(__aarch64__)
+                bus_dmamap_sync(bcm_slots_dma_tag, bcm_slots_dma_map,
+                    BUS_DMASYNC_PREREAD);
+#endif
        }
 
+#if defined(__aarch64__)
+       if(type == PAGELIST_READ) {
+               cpu_dcache_wbinv_range(buf, count);
+       } else {
+               cpu_dcache_wb_range(buf, count);
+       }
+       dsb(sy);
+#else
        pa = pmap_extract(PCPU_GET(curpmap), (vm_offset_t)buf);
        dcache_wbinv_poc((vm_offset_t)buf, pa, count);
+#endif
 
-       bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map, 
BUS_DMASYNC_PREWRITE);
+       bus_dmamap_sync(bi->pagelist_dma_tag, bi->pagelist_dma_map,
+           BUS_DMASYNC_PREWRITE);
 
        bi->pagelist = pagelist;
 
        return 0;
 
+failed_hold:
+       bus_dmamap_unload(bi->pagelist_dma_tag,bi->pagelist_dma_map);
 failed_load:
        bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, 
bi->pagelist_dma_map);
 failed_alloc:
@@ -556,6 +638,24 @@ free_pagelist(BULKINFO_T *bi, int actual)
 
        pages = (vm_page_t*)(pagelist->addrs + num_pages);
 
+#if defined(__aarch64__)
+       /*
+         * On arm64, even if the user keeps their end of the bargain
+        * -- do NOT touch the buffers sent to VC -- but reads around the
+        * pagelist after the invalidation above, the arm might preemptively
+        * load (and validate) cache lines for areas inside the page list,
+        * so we must invalidate them again.
+        *
+        * The functional test does it and without this it doesn't pass.
+        *
+        * XXXMDC might it be enough to invalidate a couple of pages at
+        * the ends of the page list?
+        */
+       if(pagelist->type >= PAGELIST_READ && actual > 0)
+               invalidate_cachelines_in_range_of_ppage_seq(pages,
+                   pagelist->offset, actual);
+#endif
+
        /* Deal with any partial cache lines (fragments) */
        if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
                char *fragments = g_fragments_base +
@@ -592,13 +692,18 @@ free_pagelist(BULKINFO_T *bi, int actual)
                up(&g_free_fragments_sema);
        }
 
-       for (i = 0; i < num_pages; i++) {
-               if (pagelist->type != PAGELIST_WRITE) {
+       if (pagelist->type != PAGELIST_WRITE) {
+               for (i = 0; i < num_pages; i++) {
                        vm_page_dirty(pages[i]);
                        pagelist_page_free(pages[i]);
                }
        }
 
+#if defined(__aarch64__)
+       /* XXXMDC necessary? */
+       dsb(sy);
+#endif
+
        bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map);
        bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, 
bi->pagelist_dma_map);
        bus_dma_tag_destroy(bi->pagelist_dma_tag);
diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c 
b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c
index e25c4d738922..36f9d0e3410d 100644
--- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c
+++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_arm.c
@@ -386,7 +386,7 @@ static void
 user_service_free(void *userdata)
 {
        USER_SERVICE_T *user_service = userdata;
-       
+
        _sema_destroy(&user_service->insert_event);
        _sema_destroy(&user_service->remove_event);
 
@@ -410,7 +410,7 @@ static void close_delivered(USER_SERVICE_T *user_service)
 
                /* Wake the user-thread blocked in close_ or remove_service */
                up(&user_service->close_event);
- 
+
                user_service->close_pending = 0;
        }
 }
@@ -749,6 +749,7 @@ vchiq_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int 
fflag,
                                (size_t)waiter, current->p_pid);
                        args.userdata = &waiter->bulk_waiter;
                }
+
                status = vchiq_bulk_transfer
                        (args.handle,
                         VCHI_MEM_HANDLE_INVALID,
@@ -1093,7 +1094,7 @@ vchiq_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, 
int fflag,
        } break;
 
        case VCHIQ_IOC_LIB_VERSION: {
-               unsigned int lib_version = (unsigned int)arg;
+               size_t lib_version = (size_t)arg;
 
                if (lib_version < VCHIQ_VERSION_MIN)
                        ret = -EINVAL;
@@ -1155,18 +1156,14 @@ vchiq_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, 
int fflag,
        return ret;
 }
 
-static void
-instance_dtr(void *data)
-{
 
-       kfree(data);
-}
 
 /****************************************************************************
 *
 *   vchiq_open
 *
 ***************************************************************************/
+static void instance_dtr(void *data);
 
 static int
 vchiq_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
@@ -1206,7 +1203,7 @@ vchiq_open(struct cdev *dev, int flags, int fmt __unused, 
struct thread *td)
                INIT_LIST_HEAD(&instance->bulk_waiter_list);
 
                devfs_set_cdevpriv(instance, instance_dtr);
-       } 
+       }
        else {
                vchiq_log_error(vchiq_arm_log_level,
                        "Unknown minor device");
@@ -1222,143 +1219,151 @@ vchiq_open(struct cdev *dev, int flags, int fmt 
__unused, struct thread *td)
 *
 ***************************************************************************/
 
+
 static int
-vchiq_close(struct cdev *dev, int flags __unused, int fmt __unused,
-                struct thread *td)
+_vchiq_close_instance(VCHIQ_INSTANCE_T instance)
 {
        int ret = 0;
-       if (1) {
-               VCHIQ_INSTANCE_T instance;
-               VCHIQ_STATE_T *state = vchiq_get_state();
-               VCHIQ_SERVICE_T *service;
-               int i;
-
-               if ((ret = devfs_get_cdevpriv((void**)&instance))) {
-                       printf("devfs_get_cdevpriv failed: error %d\n", ret);
-                       return (ret);
-               }
-
-               vchiq_log_info(vchiq_arm_log_level,
-                       "vchiq_release: instance=%lx",
-                       (unsigned long)instance);
-
-               if (!state) {
-                       ret = -EPERM;
-                       goto out;
-               }
+       VCHIQ_STATE_T *state = vchiq_get_state();
+       VCHIQ_SERVICE_T *service;
+       int i;
 
-               /* Ensure videocore is awake to allow termination. */
-               vchiq_use_internal(instance->state, NULL,
-                               USE_TYPE_VCHIQ);
+       vchiq_log_info(vchiq_arm_log_level,
+               "vchiq_release: instance=%lx",
+               (unsigned long)instance);
 
-               lmutex_lock(&instance->completion_mutex);
+       if (!state) {
+               ret = -EPERM;
+               goto out;
+       }
 
-               /* Wake the completion thread and ask it to exit */
-               instance->closing = 1;
-               up(&instance->insert_event);
+       /* Ensure videocore is awake to allow termination. */
+       vchiq_use_internal(instance->state, NULL,
+                       USE_TYPE_VCHIQ);
 
-               lmutex_unlock(&instance->completion_mutex);
+       lmutex_lock(&instance->completion_mutex);
 
-               /* Wake the slot handler if the completion queue is full. */
-               up(&instance->remove_event);
+       /* Wake the completion thread and ask it to exit */
+       instance->closing = 1;
+       up(&instance->insert_event);
 
-               /* Mark all services for termination... */
-               i = 0;
-               while ((service = next_service_by_instance(state, instance,
-                       &i)) != NULL) {
-                       USER_SERVICE_T *user_service = service->base.userdata;
+       lmutex_unlock(&instance->completion_mutex);
 
-                       /* Wake the slot handler if the msg queue is full. */
-                       up(&user_service->remove_event);
+       /* Wake the slot handler if the completion queue is full. */
+       up(&instance->remove_event);
 
-                       vchiq_terminate_service_internal(service);
-                       unlock_service(service);
-               }
+       /* Mark all services for termination... */
+       i = 0;
+       while ((service = next_service_by_instance(state, instance,
+               &i)) != NULL) {
+               USER_SERVICE_T *user_service = service->base.userdata;
 
-               /* ...and wait for them to die */
-               i = 0;
-               while ((service = next_service_by_instance(state, instance, &i))
-                       != NULL) {
-                       USER_SERVICE_T *user_service = service->base.userdata;
+               /* Wake the slot handler if the msg queue is full. */
+               up(&user_service->remove_event);
 
-                       down(&service->remove_event);
+               vchiq_terminate_service_internal(service);
+               unlock_service(service);
+       }
 
-                       BUG_ON(service->srvstate != VCHIQ_SRVSTATE_FREE);
+       /* ...and wait for them to die */
+       i = 0;
+       while ((service = next_service_by_instance(state, instance, &i))
+               != NULL) {
+               USER_SERVICE_T *user_service = service->base.userdata;
 
-                       spin_lock(&msg_queue_spinlock);
+               down(&service->remove_event);
 
-                       while (user_service->msg_remove !=
-                               user_service->msg_insert) {
-                               VCHIQ_HEADER_T *header = user_service->
-                                       msg_queue[user_service->msg_remove &
-                                               (MSG_QUEUE_SIZE - 1)];
-                               user_service->msg_remove++;
-                               spin_unlock(&msg_queue_spinlock);
+               BUG_ON(service->srvstate != VCHIQ_SRVSTATE_FREE);
 
-                               if (header)
-                                       vchiq_release_message(
-                                               service->handle,
-                                               header);
-                               spin_lock(&msg_queue_spinlock);
-                       }
+               spin_lock(&msg_queue_spinlock);
 
+               while (user_service->msg_remove !=
+                       user_service->msg_insert) {
+                       VCHIQ_HEADER_T *header = user_service->
+                               msg_queue[user_service->msg_remove &
+                                       (MSG_QUEUE_SIZE - 1)];
+                       user_service->msg_remove++;
                        spin_unlock(&msg_queue_spinlock);
 
-                       unlock_service(service);
+                       if (header)
+                               vchiq_release_message(
+                                       service->handle,
+                                       header);
+                       spin_lock(&msg_queue_spinlock);
                }
 
-               /* Release any closed services */
-               while (instance->completion_remove !=
-                       instance->completion_insert) {
-                       VCHIQ_COMPLETION_DATA_T *completion;
-                       VCHIQ_SERVICE_T *service1;
-                       completion = &instance->completions[
-                               instance->completion_remove &
-                               (MAX_COMPLETIONS - 1)];
-                       service1 = completion->service_userdata;
-                       if (completion->reason == VCHIQ_SERVICE_CLOSED)
-                       {
-                               USER_SERVICE_T *user_service =
-                                       service->base.userdata;
-
-                               /* Wake any blocked user-thread */
-                               if (instance->use_close_delivered)
-                                       up(&user_service->close_event);
-                               unlock_service(service1);
-                       }
-                       instance->completion_remove++;
-               }
+               spin_unlock(&msg_queue_spinlock);
 
-               /* Release the PEER service count. */
-               vchiq_release_internal(instance->state, NULL);
+               unlock_service(service);
+       }
 
+       /* Release any closed services */
+       while (instance->completion_remove !=
+               instance->completion_insert) {
+               VCHIQ_COMPLETION_DATA_T *completion;
+               VCHIQ_SERVICE_T *service;
+               completion = &instance->completions[
+                       instance->completion_remove &
+                       (MAX_COMPLETIONS - 1)];
+               service = completion->service_userdata;
+               if (completion->reason == VCHIQ_SERVICE_CLOSED)
                {
-                       struct list_head *pos, *next;
-                       list_for_each_safe(pos, next,
-                               &instance->bulk_waiter_list) {
-                               struct bulk_waiter_node *waiter;
-                               waiter = list_entry(pos,
-                                       struct bulk_waiter_node,
-                                       list);
-                               list_del(pos);
-                               vchiq_log_info(vchiq_arm_log_level,
-                                       "bulk_waiter - cleaned up %x "
-                                       "for pid %d",
-                                       (unsigned int)waiter, waiter->pid);
-                               _sema_destroy(&waiter->bulk_waiter.event);
-                               kfree(waiter);
-                       }
-               }
+                       USER_SERVICE_T *user_service =
+                               service->base.userdata;
 
+                       /* Wake any blocked user-thread */
+                       if (instance->use_close_delivered)
+                               up(&user_service->close_event);
+
+                       unlock_service(service);
+               }
+               instance->completion_remove++;
        }
-       else {
-               vchiq_log_error(vchiq_arm_log_level,
-                       "Unknown minor device");
-               ret = -ENXIO;
+
+       /* Release the PEER service count. */
+       vchiq_release_internal(instance->state, NULL);
+
+       {
+               struct list_head *pos, *next;
+               list_for_each_safe(pos, next,
+                       &instance->bulk_waiter_list) {
+                       struct bulk_waiter_node *waiter;
+                       waiter = list_entry(pos,
+                               struct bulk_waiter_node,
+                               list);
+                       list_del(pos);
+                       vchiq_log_info(vchiq_arm_log_level,
+                               "bulk_waiter - cleaned up %zx "
+                               "for pid %d",
+                               (size_t)waiter, waiter->pid);
*** 476 LINES SKIPPED ***


Reply via email to