The branch main has been updated by khng:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=454bc887f250ce0bceaabd0ec624d077269d3220

commit 454bc887f250ce0bceaabd0ec624d077269d3220
Author:     Ka Ho Ng <[email protected]>
AuthorDate: 2021-08-12 15:01:02 +0000
Commit:     Ka Ho Ng <[email protected]>
CommitDate: 2021-08-12 15:04:18 +0000

    uipc_shm: Implements fspacectl(2) support
    
    This implements fspacectl(2) support on shared memory objects. The
    semantic of SPACECTL_DEALLOC is equivalent to clearing the backing
    store and free the pages within the affected range. If the call
    succeeds, subsequent reads on the affected range return all zero.
    
    tests/sys/posixshm/posixshm_tests.c is expanded to include a
    fspacectl(2) functional test.
    
    Sponsored by:   The FreeBSD Foundation
    Reviewed by:    kevans, kib
    Differential Revision:  https://reviews.freebsd.org/D31490
---
 sys/kern/uipc_shm.c                | 197 ++++++++++++++++++++++++++++--------
 tests/sys/posixshm/posixshm_test.c | 199 +++++++++++++++++++++++++++++++++++++
 2 files changed, 354 insertions(+), 42 deletions(-)

diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 58c9f8cec239..16d1e22a898b 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -131,6 +131,8 @@ static int  shm_dotruncate_locked(struct shmfd *shmfd, 
off_t length,
     void *rl_cookie);
 static int     shm_copyin_path(struct thread *td, const char *userpath_in,
     char **path_out);
+static int     shm_deallocate(struct shmfd *shmfd, off_t *offset,
+    off_t *length, int flags);
 
 static fo_rdwr_t       shm_read;
 static fo_rdwr_t       shm_write;
@@ -146,6 +148,7 @@ static fo_mmap_t    shm_mmap;
 static fo_get_seals_t  shm_get_seals;
 static fo_add_seals_t  shm_add_seals;
 static fo_fallocate_t  shm_fallocate;
+static fo_fspacectl_t  shm_fspacectl;
 
 /* File descriptor operations. */
 struct fileops shm_ops = {
@@ -166,6 +169,7 @@ struct fileops shm_ops = {
        .fo_get_seals = shm_get_seals,
        .fo_add_seals = shm_add_seals,
        .fo_fallocate = shm_fallocate,
+       .fo_fspacectl = shm_fspacectl,
        .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE,
 };
 
@@ -626,14 +630,64 @@ out:
        return (error);
 }
 
+static int
+shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
+    int end)
+{
+       vm_page_t m;
+       int rv;
+
+       VM_OBJECT_ASSERT_WLOCKED(object);
+       KASSERT(base >= 0, ("%s: base %d", __func__, base));
+       KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
+           end));
+
+retry:
+       m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
+       if (m != NULL) {
+               MPASS(vm_page_all_valid(m));
+       } else if (vm_pager_has_page(object, idx, NULL, NULL)) {
+               m = vm_page_alloc(object, idx,
+                   VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
+               if (m == NULL)
+                       goto retry;
+               vm_object_pip_add(object, 1);
+               VM_OBJECT_WUNLOCK(object);
+               rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
+               VM_OBJECT_WLOCK(object);
+               vm_object_pip_wakeup(object);
+               if (rv == VM_PAGER_OK) {
+                       /*
+                        * Since the page was not resident, and therefore not
+                        * recently accessed, immediately enqueue it for
+                        * asynchronous laundering.  The current operation is
+                        * not regarded as an access.
+                        */
+                       vm_page_launder(m);
+               } else {
+                       vm_page_free(m);
+                       VM_OBJECT_WUNLOCK(object);
+                       return (EIO);
+               }
+       }
+       if (m != NULL) {
+               pmap_zero_page_area(m, base, end - base);
+               KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid",
+                   __func__, m));
+               vm_page_set_dirty(m);
+               vm_page_xunbusy(m);
+       }
+
+       return (0);
+}
+
 static int
 shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
 {
        vm_object_t object;
-       vm_page_t m;
-       vm_pindex_t idx, nobjsize;
+       vm_pindex_t nobjsize;
        vm_ooffset_t delta;
-       int base, rv;
+       int base, error;
 
        KASSERT(length >= 0, ("shm_dotruncate: length < 0"));
        object = shmfd->shm_object;
@@ -660,45 +714,10 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, 
void *rl_cookie)
                 */
                base = length & PAGE_MASK;
                if (base != 0) {
-                       idx = OFF_TO_IDX(length);
-retry:
-                       m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
-                       if (m != NULL) {
-                               MPASS(vm_page_all_valid(m));
-                       } else if (vm_pager_has_page(object, idx, NULL, NULL)) {
-                               m = vm_page_alloc(object, idx,
-                                   VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
-                               if (m == NULL)
-                                       goto retry;
-                               vm_object_pip_add(object, 1);
-                               VM_OBJECT_WUNLOCK(object);
-                               rv = vm_pager_get_pages(object, &m, 1, NULL,
-                                   NULL);
-                               VM_OBJECT_WLOCK(object);
-                               vm_object_pip_wakeup(object);
-                               if (rv == VM_PAGER_OK) {
-                                       /*
-                                        * Since the page was not resident,
-                                        * and therefore not recently
-                                        * accessed, immediately enqueue it
-                                        * for asynchronous laundering.  The
-                                        * current operation is not regarded
-                                        * as an access.
-                                        */
-                                       vm_page_launder(m);
-                               } else {
-                                       vm_page_free(m);
-                                       VM_OBJECT_WUNLOCK(object);
-                                       return (EIO);
-                               }
-                       }
-                       if (m != NULL) {
-                               pmap_zero_page_area(m, base, PAGE_SIZE - base);
-                               KASSERT(vm_page_all_valid(m),
-                                   ("shm_dotruncate: page %p is invalid", m));
-                               vm_page_set_dirty(m);
-                               vm_page_xunbusy(m);
-                       }
+                       error = shm_partial_page_invalidate(object,
+                           OFF_TO_IDX(length), base, PAGE_SIZE);
+                       if (error)
+                               return (error);
                }
                delta = IDX_TO_OFF(object->size - nobjsize);
 
@@ -1874,6 +1893,100 @@ shm_get_seals(struct file *fp, int *seals)
        return (0);
 }
 
+static int
+shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags)
+{
+       vm_object_t object;
+       vm_pindex_t pistart, pi, piend;
+       vm_ooffset_t off, len;
+       int startofs, endofs, end;
+       int error;
+
+       off = *offset;
+       len = *length;
+       KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows"));
+       object = shmfd->shm_object;
+       startofs = off & PAGE_MASK;
+       endofs = (off + len) & PAGE_MASK;
+       pistart = OFF_TO_IDX(off);
+       piend = OFF_TO_IDX(off + len);
+       pi = OFF_TO_IDX(off + PAGE_MASK);
+       error = 0;
+
+       VM_OBJECT_WLOCK(object);
+
+       if (startofs != 0) {
+               end = pistart != piend ? PAGE_SIZE : endofs;
+               error = shm_partial_page_invalidate(object, pistart, startofs,
+                   end);
+               if (error)
+                       goto out;
+               off += end - startofs;
+               len -= end - startofs;
+       }
+
+       if (pi < piend) {
+               vm_object_page_remove(object, pi, piend, 0);
+               off += IDX_TO_OFF(piend - pi);
+               len -= IDX_TO_OFF(piend - pi);
+       }
+
+       if (endofs != 0 && pistart != piend) {
+               error = shm_partial_page_invalidate(object, piend, 0, endofs);
+               if (error)
+                       goto out;
+               off += endofs;
+               len -= endofs;
+       }
+
+out:
+       VM_OBJECT_WUNLOCK(shmfd->shm_object);
+       *offset = off;
+       *length = len;
+       return (error);
+}
+
+static int
+shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int 
flags,
+    struct ucred *active_cred, struct thread *td)
+{
+       void *rl_cookie;
+       struct shmfd *shmfd;
+       off_t off, len;
+       int error;
+
+       /* This assumes that the caller already checked for overflow. */
+       error = EINVAL;
+       shmfd = fp->f_data;
+       off = *offset;
+       len = *length;
+
+       if (cmd != SPACECTL_DEALLOC || off < 0 || len <= 0 ||
+           len > OFF_MAX - off || flags != 0)
+               return (EINVAL);
+
+       rl_cookie = rangelock_wlock(&shmfd->shm_rl, off, off + len,
+           &shmfd->shm_mtx);
+       switch (cmd) {
+       case SPACECTL_DEALLOC:
+               if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) {
+                       error = EPERM;
+                       break;
+               }
+               error = shm_deallocate(shmfd, &off, &len, flags);
+               if (error != 0)
+                       break;
+               *offset = off;
+               *length = len;
+               break;
+       default:
+               __assert_unreachable();
+       }
+       rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
+       return (error);
+}
+
+
 static int
 shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
 {
diff --git a/tests/sys/posixshm/posixshm_test.c 
b/tests/sys/posixshm/posixshm_test.c
index d1c1b14aef65..eddb1d2d8250 100644
--- a/tests/sys/posixshm/posixshm_test.c
+++ b/tests/sys/posixshm/posixshm_test.c
@@ -2,6 +2,11 @@
  * Copyright (c) 2006 Robert N. M. Watson
  * All rights reserved.
  *
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Ka Ho Ng
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -173,6 +178,126 @@ verify_object(const char *path, char expected_value)
        close(fd);
 }
 
+static off_t shm_max_pages = 32;
+static const char byte_to_fill = 0x5f;
+
+static int
+shm_fill(int fd, off_t offset, off_t len)
+{
+       int error;
+       size_t blen;
+       char *buf;
+       error = 0;
+       buf = malloc(PAGE_SIZE);
+       if (buf == NULL)
+               return (1);
+
+       while (len > 0) {
+               blen = len < (off_t)PAGE_SIZE ? len : PAGE_SIZE;
+               memset(buf, byte_to_fill, blen);
+               if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) {
+                       error = 1;
+                       break;
+               }
+               len -= blen;
+               offset += blen;
+       }
+
+       free(buf);
+       return (error);
+}
+
+static int
+check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t shm_sz)
+{
+       int error;
+       size_t blen;
+       off_t offset, resid;
+       struct stat statbuf;
+       char *buf, *sblk;
+
+       error = 0;
+       buf = malloc(PAGE_SIZE * 2);
+       if (buf == NULL)
+               return (1);
+       sblk = buf + PAGE_SIZE;
+
+       memset(sblk, 0, PAGE_SIZE);
+
+       if ((uint64_t)hole_start + hole_len > (uint64_t)shm_sz)
+               hole_len = shm_sz - hole_start;
+
+       /*
+        * Check hole is zeroed.
+        */
+       offset = hole_start;
+       resid = hole_len;
+       while (resid > 0) {
+               blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+               if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+                       error = 1;
+                       break;
+               }
+               if (memcmp(buf, sblk, blen) != 0) {
+                       error = 1;
+                       break;
+               }
+               resid -= blen;
+               offset += blen;
+       }
+
+       memset(sblk, byte_to_fill, PAGE_SIZE);
+
+       /*
+        * Check file region before hole is zeroed.
+        */
+       offset = 0;
+       resid = hole_start;
+       while (resid > 0) {
+               blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+               if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+                       error = 1;
+                       break;
+               }
+               if (memcmp(buf, sblk, blen) != 0) {
+                       error = 1;
+                       break;
+               }
+               resid -= blen;
+               offset += blen;
+       }
+
+       /*
+        * Check file region after hole is zeroed.
+        */
+       offset = hole_start + hole_len;
+       resid = shm_sz - offset;
+       while (resid > 0) {
+               blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+               if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+                       error = 1;
+                       break;
+               }
+               if (memcmp(buf, sblk, blen) != 0) {
+                       error = 1;
+                       break;
+               }
+               resid -= blen;
+               offset += blen;
+       }
+
+       /*
+        * Check file size matches with expected file size.
+        */
+       if (fstat(fd, &statbuf) == -1)
+               error = -1;
+       if (statbuf.st_size != shm_sz)
+               error = -1;
+
+       free(buf);
+       return (error);
+}
+
 ATF_TC_WITHOUT_HEAD(remap_object);
 ATF_TC_BODY(remap_object, tc)
 {
@@ -958,6 +1083,79 @@ ATF_TC_BODY(fallocate, tc)
        close(fd);
 }
 
+ATF_TC_WITHOUT_HEAD(fspacectl);
+ATF_TC_BODY(fspacectl, tc)
+{
+       struct spacectl_range range;
+       off_t offset, length, shm_sz;
+       int fd, error;
+
+       shm_sz = shm_max_pages << PAGE_SHIFT;
+
+       fd = shm_open("/testtest", O_RDWR|O_CREAT, 0666);
+       ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; errno:%d", errno);
+       ATF_REQUIRE_MSG((error = posix_fallocate(fd, 0, shm_sz)) == 0,
+           "posix_fallocate failed; error=%d", error);
+
+       /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
+       ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+       range.r_offset = offset = PAGE_SIZE;
+       range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) -
+           range.r_offset;
+       ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+           "Aligned fspacectl failed; errno=%d", errno);
+       ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+           "Aligned fspacectl content checking failed", errno);
+
+       /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
+       ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+       range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+       range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) +
+           (1 << (PAGE_SHIFT - 1)) - offset;
+       ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+           "Unaligned fspacectl failed; errno=%d", errno);
+       ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+           "Unaligned fspacectl content checking failed", errno);
+
+       /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
+       ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+       range.r_offset = offset = PAGE_SHIFT;
+       range.r_len = length = OFF_MAX - offset;
+       ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+           "Aligned fspacectl to OFF_MAX failed; errno=%d", errno);
+       ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+           "Aligned fspacectl to OFF_MAX content checking failed", errno);
+
+       /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
+       ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+       range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+       range.r_len = length = OFF_MAX - offset;
+       ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+           "Unaligned fspacectl to OFF_MAX failed; errno=%d", errno);
+       ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+           "Unaligned fspacectl to OFF_MAX content checking failed", errno);
+
+       /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
+       ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+       range.r_offset = offset = PAGE_SIZE;
+       range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
+       ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+           "Aligned fspacectl past shm_sz failed; errno=%d", errno);
+       ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+           "Aligned fspacectl past shm_sz content checking failed", errno);
+
+       /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
+       ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+       range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+       range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
+       ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+           "Unaligned fspacectl past shm_sz failed; errno=%d", errno);
+       ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+           "Unaligned fspacectl past shm_sz content checking failed", errno);
+
+       ATF_REQUIRE(close(fd) == 0);
+}
+
 static int
 shm_open_large(int psind, int policy, size_t sz)
 {
@@ -1704,6 +1902,7 @@ ATF_TP_ADD_TCS(tp)
        ATF_TP_ADD_TC(tp, cloexec);
        ATF_TP_ADD_TC(tp, mode);
        ATF_TP_ADD_TC(tp, fallocate);
+       ATF_TP_ADD_TC(tp, fspacectl);
        ATF_TP_ADD_TC(tp, largepage_basic);
        ATF_TP_ADD_TC(tp, largepage_config);
        ATF_TP_ADD_TC(tp, largepage_mmap);
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to