The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=1ecf01065b45018de3901c8bf89d703af737feeb
commit 1ecf01065b45018de3901c8bf89d703af737feeb Author: Konstantin Belousov <[email protected]> AuthorDate: 2020-12-21 17:41:34 +0000 Commit: Konstantin Belousov <[email protected]> CommitDate: 2025-12-02 03:41:13 +0000 libuvmem: usermode port of vmem(9) The quantum cache is disabled, there is no uma. Intent is to use this for resource allocation in bhyve(8), for start. Addition of -luvmem to bhyve linking was done to test changes to share/mk. Reviewed by: bnovkov, markj Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D27220 --- lib/Makefile | 1 + lib/libuvmem/Makefile | 17 +++ lib/libuvmem/Symbol.map | 15 +++ share/mk/bsd.libnames.mk | 1 + share/mk/src.libnames.mk | 5 + sys/contrib/openzfs/include/sys/zfs_context.h | 2 + sys/kern/subr_vmem.c | 151 +++++++++++++++++++++++--- sys/sys/vmem.h | 23 ++-- usr.sbin/bhyve/Makefile | 2 +- 9 files changed, 194 insertions(+), 23 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 75a2355aa452..216ba1d58473 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -107,6 +107,7 @@ SUBDIR= ${SUBDIR_BOOTSTRAP} \ libulog \ libutil \ libutil++ \ + libuvmem \ ${_libvgl} \ libwrap \ libxo \ diff --git a/lib/libuvmem/Makefile b/lib/libuvmem/Makefile new file mode 100644 index 000000000000..9d13eabd277c --- /dev/null +++ b/lib/libuvmem/Makefile @@ -0,0 +1,17 @@ +PACKAGE= runtime + +LIB= uvmem +SRCS= subr_vmem.c + +SHLIB_MAJOR= 1 +LIBADD+= pthread +CFLAGS.clang+=-Wno-thread-safety-analysis + +SYMBOL_MAPS= ${.CURDIR}/Symbol.map +VERSION_DEF= ${SRCTOP}/lib/libc/Versions.def + +.include <src.opts.mk> + +.PATH: ${SRCTOP}/sys/kern + +.include <bsd.lib.mk> diff --git a/lib/libuvmem/Symbol.map b/lib/libuvmem/Symbol.map new file mode 100644 index 000000000000..0f408460cd9c --- /dev/null +++ b/lib/libuvmem/Symbol.map @@ -0,0 +1,15 @@ +FBSD_1.9 { + vmem_add; + vmem_alloc; + vmem_create; + vmem_destroy; + vmem_free; + vmem_init; + vmem_roundup_size; + vmem_set_import; + vmem_set_limit; + vmem_set_reclaim; + vmem_size; + vmem_xalloc; + vmem_xfree; +}; diff --git a/share/mk/bsd.libnames.mk b/share/mk/bsd.libnames.mk index 2f099e0579b2..54bdfd301cd3 100644 --- a/share/mk/bsd.libnames.mk +++ b/share/mk/bsd.libnames.mk @@ -171,6 +171,7 @@ LIBUSB?= ${LIBDESTDIR}${LIBDIR_BASE}/libusb.a LIBUSBHID?= ${LIBDESTDIR}${LIBDIR_BASE}/libusbhid.a LIBUTIL?= ${LIBDESTDIR}${LIBDIR_BASE}/libutil.a LIBUUTIL?= ${LIBDESTDIR}${LIBDIR_BASE}/libuutil.a +LIBUVMEM?= ${LIBDESTDIR}${LIBDIR_BASE}/libuvmem.a LIBVERTO?= ${LIBDESTDIR}${LIBDIR_BASE}/libverto.a LIBVGL?= ${LIBDESTDIR}${LIBDIR_BASE}/libvgl.a LIBVMMAPI?= ${LIBDESTDIR}${LIBDIR_BASE}/libvmmapi.a diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index fd0424a96d9b..e108cc15fb55 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -237,6 +237,7 @@ _LIBRARIES= \ usb \ usbhid \ util \ + uvmem \ uutil \ verto \ vmmapi \ @@ -491,6 +492,7 @@ _DP_fifolog= z _DP_ipf= kvm _DP_tpool= spl _DP_uutil= avl spl +_DP_uvmem= pthread _DP_zfs= md pthread rt umem util uutil m avl bsdxml crypto geom nvpair \ z zfs_core zutil _DP_zfsbootenv= zfs nvpair @@ -767,6 +769,9 @@ LIBSYS_PIC?= ${LIBSYS_PICDIR}/libsys_pic.a LIBSAMPLERATEDIR?= ${_LIB_OBJTOP}/lib/libsamplerate LIBSAMPLERATE?= ${LIBSAMPLERATEDIR}/libsamplerate${PIE_SUFFIX}.a +LIBUVMEMDIR= ${OBJTOP}/lib/libuvmem +LIBUVMEM?= ${LIBUVMEMDIR}/libuvmem${PIE_SUFFIX}.a + # Define a directory for each library. This is useful for adding -L in when # not using a --sysroot or for meta mode bootstrapping when there is no # Makefile.depend. These are sorted by directory. diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h index 5e571d497642..791a15196e3e 100644 --- a/sys/contrib/openzfs/include/sys/zfs_context.h +++ b/sys/contrib/openzfs/include/sys/zfs_context.h @@ -50,7 +50,9 @@ extern "C" { #include <sys/cmn_err.h> #include <sys/kmem.h> #include <sys/kmem_cache.h> +#ifndef __FreeBSD__ #include <sys/vmem.h> +#endif #include <sys/misc.h> #include <sys/taskq.h> #include <sys/param.h> diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index 507c8ad897e9..afc327c512ce 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -41,6 +41,9 @@ */ #include <sys/cdefs.h> + +#ifdef _KERNEL + #include "opt_ddb.h" #include <sys/param.h> @@ -75,6 +78,28 @@ #include <vm/vm_pagequeue.h> #include <vm/uma_int.h> +#else /* _KERNEL */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/hash.h> +#include <sys/vmem.h> +#include <assert.h> +#include <errno.h> +#include <pthread.h> +#include <pthread_np.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> + +#define KASSERT(a, b) +#define MPASS(a) +#define WITNESS_WARN(a, b, c) +#define panic(...) assert(0) + +#endif /* _KERNEL */ + #define VMEM_OPTORDER 5 #define VMEM_OPTVALUE (1 << VMEM_OPTORDER) #define VMEM_MAXORDER \ @@ -87,24 +112,40 @@ #define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT | M_NEXTFIT) -#define VMEM_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | \ - M_BESTFIT | M_FIRSTFIT | M_NEXTFIT) - -#define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM) - #define QC_NAME_MAX 16 /* * Data structures private to vmem. */ +#ifdef _KERNEL + +#define VMEM_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | \ + M_BESTFIT | M_FIRSTFIT | M_NEXTFIT) + +#define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM) + MALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures"); +#else /* _KERNEL */ + +/* bit-compat with kernel */ +#define M_ZERO 0 +#define M_NOVM 0 +#define M_USE_RESERVE 0 + +#define VMEM_FLAGS (M_NOWAIT | M_BESTFIT | M_FIRSTFIT | M_NEXTFIT) + +#define BT_FLAGS 0 + +#endif /* _KERNEL */ + typedef struct vmem_btag bt_t; TAILQ_HEAD(vmem_seglist, vmem_btag); LIST_HEAD(vmem_freelist, vmem_btag); LIST_HEAD(vmem_hashlist, vmem_btag); +#ifdef _KERNEL struct qcache { uma_zone_t qc_cache; vmem_t *qc_vmem; @@ -113,6 +154,7 @@ struct qcache { }; typedef struct qcache qcache_t; #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) +#endif #define VMEM_NAME_MAX 16 @@ -132,8 +174,13 @@ struct vmem_btag { /* vmem arena */ struct vmem { +#ifdef _KERNEL struct mtx_padalign vm_lock; struct cv vm_cv; +#else + pthread_mutex_t vm_lock; + pthread_cond_t vm_cv; +#endif char vm_name[VMEM_NAME_MAX+1]; LIST_ENTRY(vmem) vm_alllist; struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN]; @@ -165,8 +212,10 @@ struct vmem { /* Space exhaustion callback. */ vmem_reclaim_t *vm_reclaimfn; +#ifdef _KERNEL /* quantum cache */ qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX]; +#endif }; #define BT_TYPE_SPAN 1 /* Allocated from importfn */ @@ -178,6 +227,7 @@ struct vmem { #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) +#ifdef _KERNEL #if defined(DIAGNOSTIC) static int enable_vmem_check = 0; SYSCTL_INT(_debug, OID_AUTO, vmem_check, CTLFLAG_RWTUN, @@ -190,10 +240,17 @@ static int vmem_periodic_interval; static struct task vmem_periodic_wk; static struct mtx_padalign __exclusive_cache_line vmem_list_lock; -static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); static uma_zone_t vmem_zone; +#else /* _KERNEL */ +static pthread_mutex_t vmem_list_lock = PTHREAD_MUTEX_INITIALIZER; + +#endif /* _KERNEL */ + +static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); + /* ---- misc */ +#ifdef _KERNEL #define VMEM_LIST_LOCK() mtx_lock(&vmem_list_lock) #define VMEM_LIST_UNLOCK() mtx_unlock(&vmem_list_lock) @@ -207,6 +264,21 @@ static uma_zone_t vmem_zone; #define VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF) #define VMEM_LOCK_DESTROY(vm) mtx_destroy(&vm->vm_lock) #define VMEM_ASSERT_LOCKED(vm) mtx_assert(&vm->vm_lock, MA_OWNED); +#else /* _KERNEL */ +#define VMEM_LIST_LOCK() pthread_mutex_lock(&vmem_list_lock) +#define VMEM_LIST_UNLOCK() pthread_mutex_unlock(&vmem_list_lock) + +#define VMEM_CONDVAR_INIT(vm, wchan) pthread_cond_init(&vm->vm_cv, NULL) +#define VMEM_CONDVAR_DESTROY(vm) pthread_cond_destroy(&vm->vm_cv) +#define VMEM_CONDVAR_WAIT(vm) pthread_cond_wait(&vm->vm_cv, &vm->vm_lock) +#define VMEM_CONDVAR_BROADCAST(vm) pthread_cond_broadcast(&vm->vm_cv) + +#define VMEM_LOCK(vm) pthread_mutex_lock(&vm->vm_lock) +#define VMEM_UNLOCK(vm) pthread_mutex_unlock(&vm->vm_lock) +#define VMEM_LOCK_INIT(vm, name) pthread_mutex_init(&vm->vm_lock, NULL) +#define VMEM_LOCK_DESTROY(vm) pthread_mutex_destroy(&vm->vm_lock) +#define VMEM_ASSERT_LOCKED(vm) pthread_mutex_isowned_np(&vm->vm_lock) +#endif /* _KERNEL */ #define VMEM_ALIGNUP(addr, align) (-(-(addr) & -(align))) @@ -231,6 +303,7 @@ static uma_zone_t vmem_zone; */ #define BT_MAXFREE (BT_MAXALLOC * 8) +#ifdef _KERNEL /* Allocator for boundary tags. */ static uma_zone_t vmem_bt_zone; @@ -245,7 +318,8 @@ vmem_t *transient_arena = &transient_arena_storage; #ifdef DEBUG_MEMGUARD static struct vmem memguard_arena_storage; vmem_t *memguard_arena = &memguard_arena_storage; -#endif +#endif /* DEBUG_MEMGUARD */ +#endif /* _KERNEL */ static bool bt_isbusy(bt_t *bt) @@ -265,12 +339,13 @@ bt_isfree(bt_t *bt) * at least the maximum possible tag allocations in the arena. */ static __noinline int -_bt_fill(vmem_t *vm, int flags) +_bt_fill(vmem_t *vm, int flags __unused) { bt_t *bt; VMEM_ASSERT_LOCKED(vm); +#ifdef _KERNEL /* * Only allow the kernel arena and arenas derived from kernel arena to * dip into reserve tags. They are where new tags come from. @@ -278,6 +353,7 @@ _bt_fill(vmem_t *vm, int flags) flags &= BT_FLAGS; if (vm != kernel_arena && vm->vm_arg != kernel_arena) flags &= ~M_USE_RESERVE; +#endif /* * Loop until we meet the reserve. To minimize the lock shuffle @@ -286,12 +362,18 @@ _bt_fill(vmem_t *vm, int flags) * holding a vmem lock. */ while (vm->vm_nfreetags < BT_MAXALLOC) { +#ifdef _KERNEL bt = uma_zalloc(vmem_bt_zone, (flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM); +#else + bt = malloc(sizeof(struct vmem_btag)); +#endif if (bt == NULL) { +#ifdef _KERNEL VMEM_UNLOCK(vm); bt = uma_zalloc(vmem_bt_zone, flags); VMEM_LOCK(vm); +#endif if (bt == NULL) break; } @@ -351,7 +433,11 @@ bt_freetrim(vmem_t *vm, int freelimit) VMEM_UNLOCK(vm); while ((bt = LIST_FIRST(&freetags)) != NULL) { LIST_REMOVE(bt, bt_freelist); +#ifdef _KERNEL uma_zfree(vmem_bt_zone, bt); +#else + free(bt); +#endif } } @@ -538,6 +624,7 @@ bt_insfree(vmem_t *vm, bt_t *bt) /* ---- vmem internal functions */ +#ifdef _KERNEL /* * Import from the arena into the quantum cache in UMA. * @@ -722,8 +809,6 @@ vmem_startup(void) #endif } -/* ---- rehash */ - static int vmem_rehash(vmem_t *vm, vmem_size_t newhashsize) { @@ -821,6 +906,7 @@ vmem_start_callout(void *unused) vmem_periodic_kick, NULL); } SYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL); +#endif /* _KERNEL */ static void vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type) @@ -876,10 +962,12 @@ vmem_destroy1(vmem_t *vm) { bt_t *bt; +#ifdef _KERNEL /* * Drain per-cpu quantum caches. */ qc_destroy(vm); +#endif /* * The vmem should now only contain empty segments. @@ -891,14 +979,23 @@ vmem_destroy1(vmem_t *vm) while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL) bt_remseg(vm, bt); - if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) + if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) { +#ifdef _KERNEL free(vm->vm_hashlist, M_VMEM); +#else + free(vm->vm_hashlist); +#endif + } bt_freetrim(vm, 0); VMEM_CONDVAR_DESTROY(vm); VMEM_LOCK_DESTROY(vm); +#ifdef _KERNEL uma_zfree(vmem_zone, vm); +#else + free(vm); +#endif } static int @@ -1055,8 +1152,10 @@ vmem_try_fetch(vmem_t *vm, const vmem_size_t size, vmem_size_t align, int flags) avail = vm->vm_size - vm->vm_inuse; bt_save(vm); VMEM_UNLOCK(vm); +#ifdef _KERNEL if (vm->vm_qcache_max != 0) qc_drain(vm); +#endif if (vm->vm_reclaimfn != NULL) vm->vm_reclaimfn(vm, flags); VMEM_LOCK(vm); @@ -1236,8 +1335,14 @@ vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, { vmem_size_t i; +#ifdef _KERNEL MPASS(quantum > 0); MPASS((quantum & (quantum - 1)) == 0); +#else + assert(quantum == 0); + assert(qcache_max == 0); + quantum = 1; +#endif bzero(vm, sizeof(*vm)); @@ -1252,7 +1357,11 @@ vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, vm->vm_size = 0; vm->vm_limit = 0; vm->vm_inuse = 0; +#ifdef _KERNEL qc_init(vm, qcache_max); +#else + (void)qcache_max; +#endif TAILQ_INIT(&vm->vm_seglist); vm->vm_cursor.bt_start = vm->vm_cursor.bt_size = 0; @@ -1290,7 +1399,13 @@ vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, vmem_t *vm; +#ifdef _KERNEL vm = uma_zalloc(vmem_zone, flags & (M_WAITOK|M_NOWAIT)); +#else + assert(quantum == 0); + assert(qcache_max == 0); + vm = malloc(sizeof(vmem_t)); +#endif if (vm == NULL) return (NULL); if (vmem_init(vm, name, base, size, quantum, qcache_max, @@ -1302,7 +1417,6 @@ vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, void vmem_destroy(vmem_t *vm) { - VMEM_LIST_LOCK(); LIST_REMOVE(vm, vm_alllist); VMEM_LIST_UNLOCK(); @@ -1324,7 +1438,6 @@ int vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp) { const int strat __unused = flags & VMEM_FITMASK; - qcache_t *qc; flags &= VMEM_FLAGS; MPASS(size > 0); @@ -1332,7 +1445,10 @@ vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp) if ((flags & M_NOWAIT) == 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc"); +#ifdef _KERNEL if (size <= vm->vm_qcache_max) { + qcache_t *qc; + /* * Resource 0 cannot be cached, so avoid a blocking allocation * in qc_import() and give the vmem_xalloc() call below a chance @@ -1344,6 +1460,7 @@ vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp) if (__predict_true(*addrp != 0)) return (0); } +#endif return (vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags, addrp)); @@ -1463,14 +1580,17 @@ out: void vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) { - qcache_t *qc; MPASS(size > 0); +#ifdef _KERNEL if (size <= vm->vm_qcache_max && __predict_true(addr >= VMEM_ADDR_QCACHE_MIN)) { + qcache_t *qc; + qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; uma_zfree(qc->qc_cache, (void *)addr); } else +#endif vmem_xfree(vm, addr, size); } @@ -1564,11 +1684,13 @@ vmem_size(vmem_t *vm, int typemask) return (0); default: panic("vmem_size"); + return (0); } } /* ---- debug */ +#ifdef _KERNEL #if defined(DDB) || defined(DIAGNOSTIC) static void bt_dump(const bt_t *, int (*)(const char *, ...) @@ -1820,3 +1942,4 @@ vmem_check(vmem_t *vm) } #endif /* defined(DIAGNOSTIC) */ +#endif /* _KERNEL */ diff --git a/sys/sys/vmem.h b/sys/sys/vmem.h index 20eee9f6ccd3..daceac796722 100644 --- a/sys/sys/vmem.h +++ b/sys/sys/vmem.h @@ -33,8 +33,6 @@ #include <sys/types.h> -#ifdef _KERNEL - typedef struct vmem vmem_t; typedef uintptr_t vmem_addr_t; @@ -44,10 +42,24 @@ typedef size_t vmem_size_t; #define VMEM_ADDR_QCACHE_MIN 1 #define VMEM_ADDR_MAX (~(vmem_addr_t)0) +/* vmem_size typemask */ +#define VMEM_ALLOC 0x01 +#define VMEM_FREE 0x02 +#define VMEM_MAXFREE 0x10 + typedef int (vmem_import_t)(void *, vmem_size_t, int, vmem_addr_t *); typedef void (vmem_release_t)(void *, vmem_addr_t, vmem_size_t); typedef void (vmem_reclaim_t)(vmem_t *, int); +#ifndef _KERNEL +#define M_NOWAIT 0x0800 /* userspace hack */ +#define M_FIRSTFIT 0x1000 /* only for vmem, fast fit */ +#define M_BESTFIT 0x2000 /* only for vmem, low fragmentation */ +#define M_NEXTFIT 0x8000 /* only for vmem, follow cursor */ +#endif + +__BEGIN_DECLS + /* * Create a vmem: * name - Name of the region @@ -134,11 +146,6 @@ void vmem_printall(const char *, int (*fn)(const char *, ...) __printflike(1, 2)); void vmem_startup(void); -/* vmem_size typemask */ -#define VMEM_ALLOC 0x01 -#define VMEM_FREE 0x02 -#define VMEM_MAXFREE 0x10 - -#endif /* _KERNEL */ +__END_DECLS #endif /* !_SYS_VMEM_H_ */ diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index c454a280edba..25fb71b5768f 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -97,7 +97,7 @@ CFLAGS+=-I${.CURDIR} \ -I${.CURDIR}/../../contrib/lib9p \ -I${SRCTOP}/sys -LIBADD+= vmmapi md nv pthread z util sbuf cam 9p +LIBADD+= vmmapi md nv uvmem pthread z util sbuf cam 9p .if ${MK_BHYVE_SNAPSHOT} != "no" LIBADD+= ucl xo
