Author: jhb
Date: Fri Aug 16 21:13:55 2013
New Revision: 254430
URL: http://svnweb.freebsd.org/changeset/base/254430

Log:
  Add new mmap(2) flags to permit applications to request specific virtual
  address alignment of mappings.
  - MAP_ALIGNED(n) requests a mapping aligned on a boundary of (1 << n).
    Requests for n >= number of bits in a pointer or less than the size of
    a page fail with EINVAL.  This matches the API provided by NetBSD.
  - MAP_ALIGNED_SUPER is a special case of MAP_ALIGNED.  It can be used
    to optimize the chances of using large pages.  By default it will align
    the mapping on a large page boundary (the system is free to choose any
    large page size to align to that seems best for the mapping request).
    However, if the object being mapped is already using large pages, then
    it will align the virtual mapping to match the existing large pages in
    the object instead.
  - Internally, VMFS_ALIGNED_SPACE is now renamed to VMFS_SUPER_SPACE, and
    VMFS_ALIGNED_SPACE(n) is repurposed for specifying a specific alignment.
    MAP_ALIGNED(n) maps to using VMFS_ALIGNED_SPACE(n), while
    MAP_ALIGNED_SUPER maps to VMFS_SUPER_SPACE.
  - mmap() of a device object now uses VMFS_OPTIMAL_SPACE rather than
    explicitly using VMFS_SUPER_SPACE.  All device objects are forced to
    use a specific color on creation, so VMFS_OPTIMAL_SPACE is effectively
    equivalent.
  
  Reviewed by:  alc
  MFC after:    1 month

Modified:
  head/lib/libc/sys/mmap.2
  head/sys/sys/mman.h
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h
  head/sys/vm/vm_mmap.c
  head/usr.bin/kdump/mksubr
  head/usr.bin/truss/syscalls.c

Modified: head/lib/libc/sys/mmap.2
==============================================================================
--- head/lib/libc/sys/mmap.2    Fri Aug 16 21:04:58 2013        (r254429)
+++ head/lib/libc/sys/mmap.2    Fri Aug 16 21:13:55 2013        (r254430)
@@ -28,7 +28,7 @@
 .\"    @(#)mmap.2      8.4 (Berkeley) 5/11/95
 .\" $FreeBSD$
 .\"
-.Dd March 18, 2012
+.Dd August 16, 2013
 .Dt MMAP 2
 .Os
 .Sh NAME
@@ -97,7 +97,30 @@ Sharing, mapping type and options are sp
 argument by
 .Em or Ns 'ing
 the following values:
-.Bl -tag -width MAP_HASSEMAPHORE
+.Bl -tag -width MAP_PREFAULT_READ
+.It Dv MAP_ALIGNED Ns Pq Fa n
+Align the region on a requested boundary.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The
+.Fa n
+argument specifies the binary logarithm of the desired alignment.
+.It Dv MAP_ALIGNED_SUPER
+Align the region to maximize the potential use of large
+.Pq Dq super
+pages.
+If a suitable region cannot be found,
+.Fn mmap
+will fail.
+The system will choose a suitable page size based on the size of
+mapping.
+The page size used as well as the alignment of the region may both be
+affected by properties of the file being mapped.
+In particular,
+the physical address of existing pages of a file may require a specific
+alignment.
+The region is not guaranteed to be aligned on any specific boundary.
 .It Dv MAP_ANON
 Map anonymous memory not associated with any specific file.
 The file descriptor used for creating
@@ -274,6 +297,25 @@ Although this implementation does not im
 the
 .Fa offset
 argument, a portable program must only use page-aligned values.
+.Pp
+Large page mappings require that the pages backing an object be
+aligned in matching blocks in both the virtual address space and RAM.
+The system will automatically attempt to use large page mappings when
+mapping an object that is already backed by large pages in RAM by
+aligning the mapping request in the virtual address space to match the
+alignment of the large physical pages.
+The system may also use large page mappings when mapping portions of an
+object that are not yet backed by pages in RAM.
+The
+.Dv MAP_ALIGNED_SUPER
+flag is an optimization that will align the mapping request to the
+size of a large page similar to
+.Dv MAP_ALIGNED ,
+except that the system will override this alignment if an object already
+uses large pages so that the mapping will be consistent with the existing
+large pages.
+This flag is mostly useful for maximizing the use of large pages on the
+first mapping of objects that do not yet have pages present in RAM.
 .Sh RETURN VALUES
 Upon successful completion,
 .Fn mmap
@@ -325,6 +367,10 @@ The
 argument
 was equal to zero.
 .It Bq Er EINVAL
+.Dv MAP_ALIGNED
+was specified and the desired alignment was either larger than the
+virtual address size of the machine or smaller than a page.
+.It Bq Er EINVAL
 .Dv MAP_ANON
 was specified and the
 .Fa fd
@@ -356,7 +402,8 @@ was specified and insufficient memory wa
 .Xr msync 2 ,
 .Xr munlock 2 ,
 .Xr munmap 2 ,
-.Xr getpagesize 3
+.Xr getpagesize 3 ,
+.Xr getpagesizes 3
 .Sh BUGS
 The
 .Fa len

Modified: head/sys/sys/mman.h
==============================================================================
--- head/sys/sys/mman.h Fri Aug 16 21:04:58 2013        (r254429)
+++ head/sys/sys/mman.h Fri Aug 16 21:13:55 2013        (r254430)
@@ -91,6 +91,17 @@
  */
 #define        MAP_NOCORE       0x00020000 /* dont include these pages in a 
coredump */
 #define        MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */
+
+/*
+ * Request specific alignment (n == log2 of the desired alignment).
+ *
+ * MAP_ALIGNED_SUPER requests optimal superpage alignment, but does
+ * not enforce a specific alignment.
+ */
+#define        MAP_ALIGNED(n)   ((n) << MAP_ALIGNMENT_SHIFT)
+#define        MAP_ALIGNMENT_SHIFT     24
+#define        MAP_ALIGNMENT_MASK      MAP_ALIGNED(0xff)
+#define        MAP_ALIGNED_SUPER       MAP_ALIGNED(1) /* align on a superpage 
*/
 #endif /* __BSD_VISIBLE */
 
 #if __POSIX_VISIBLE >= 199309

Modified: head/sys/vm/vm_init.c
==============================================================================
--- head/sys/vm/vm_init.c       Fri Aug 16 21:04:58 2013        (r254429)
+++ head/sys/vm/vm_init.c       Fri Aug 16 21:13:55 2013        (r254430)
@@ -112,7 +112,7 @@ kva_import(void *unused, vmem_size_t siz
  
        addr = vm_map_min(kernel_map);
        result = vm_map_find(kernel_map, NULL, 0, &addr, size,
-           VMFS_ALIGNED_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+           VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
        if (result != KERN_SUCCESS)
                 return (ENOMEM);
 

Modified: head/sys/vm/vm_kern.c
==============================================================================
--- head/sys/vm/vm_kern.c       Fri Aug 16 21:04:58 2013        (r254429)
+++ head/sys/vm/vm_kern.c       Fri Aug 16 21:13:55 2013        (r254430)
@@ -286,7 +286,7 @@ kmem_suballoc(vm_map_t parent, vm_offset
 
        *min = vm_map_min(parent);
        ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ?
-           VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
+           VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL,
            MAP_ACC_NO_CHARGE);
        if (ret != KERN_SUCCESS)
                panic("kmem_suballoc: bad status return of %d", ret);

Modified: head/sys/vm/vm_map.c
==============================================================================
--- head/sys/vm/vm_map.c        Fri Aug 16 21:04:58 2013        (r254429)
+++ head/sys/vm/vm_map.c        Fri Aug 16 21:13:55 2013        (r254430)
@@ -1434,12 +1434,17 @@ vm_map_find(vm_map_t map, vm_object_t ob
            vm_size_t length, int find_space, vm_prot_t prot,
            vm_prot_t max, int cow)
 {
-       vm_offset_t start, initial_addr;
+       vm_offset_t alignment, initial_addr, start;
        int result;
 
        if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
            (object->flags & OBJ_COLORED) == 0))
-                       find_space = VMFS_ANY_SPACE;
+               find_space = VMFS_ANY_SPACE;
+       if (find_space >> 8 != 0) {
+               KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
+               alignment = (vm_offset_t)1 << (find_space >> 8);
+       } else
+               alignment = 0;
        initial_addr = *addr;
 again:
        start = initial_addr;
@@ -1455,12 +1460,18 @@ again:
                                return (KERN_NO_SPACE);
                        }
                        switch (find_space) {
-                       case VMFS_ALIGNED_SPACE:
+                       case VMFS_SUPER_SPACE:
                        case VMFS_OPTIMAL_SPACE:
                                pmap_align_superpage(object, offset, addr,
                                    length);
                                break;
+                       case VMFS_ANY_SPACE:
+                               break;
                        default:
+                               if ((*addr & (alignment - 1)) != 0) {
+                                       *addr &= ~(alignment - 1);
+                                       *addr += alignment;
+                               }
                                break;
                        }
 
@@ -1468,8 +1479,8 @@ again:
                }
                result = vm_map_insert(map, object, offset, start, start +
                    length, prot, max, cow);
-       } while (result == KERN_NO_SPACE && (find_space == VMFS_ALIGNED_SPACE ||
-           find_space == VMFS_OPTIMAL_SPACE));
+       } while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE &&
+           find_space != VMFS_ANY_SPACE);
        vm_map_unlock(map);
        return (result);
 }

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h        Fri Aug 16 21:04:58 2013        (r254429)
+++ head/sys/vm/vm_map.h        Fri Aug 16 21:13:55 2013        (r254430)
@@ -339,12 +339,16 @@ long vmspace_resident_count(struct vmspa
 #define        VM_FAULT_READ_AHEAD_MAX         min(atop(MAXPHYS) - 1, 
UINT8_MAX)
 
 /*
- * The following "find_space" options are supported by vm_map_find()
+ * The following "find_space" options are supported by vm_map_find().
+ *
+ * For VMFS_ALIGNED_SPACE, the desired alignment is specified to
+ * the macro argument as log base 2 of the desired alignment.
  */
 #define        VMFS_NO_SPACE           0       /* don't find; use the given 
range */
 #define        VMFS_ANY_SPACE          1       /* find a range with any 
alignment */
 #define        VMFS_OPTIMAL_SPACE      2       /* find a range with optimal 
alignment*/
-#define        VMFS_ALIGNED_SPACE      3       /* find a superpage-aligned 
range */
+#define        VMFS_SUPER_SPACE        3       /* find a superpage-aligned 
range */
+#define        VMFS_ALIGNED_SPACE(x)   ((x) << 8) /* find a range with fixed 
alignment */
 
 /*
  * vm_map_wire and vm_map_unwire option flags

Modified: head/sys/vm/vm_mmap.c
==============================================================================
--- head/sys/vm/vm_mmap.c       Fri Aug 16 21:04:58 2013        (r254429)
+++ head/sys/vm/vm_mmap.c       Fri Aug 16 21:13:55 2013        (r254430)
@@ -201,7 +201,7 @@ sys_mmap(td, uap)
        vm_prot_t cap_maxprot, prot, maxprot;
        void *handle;
        objtype_t handle_type;
-       int flags, error;
+       int align, error, flags;
        off_t pos;
        struct vmspace *vms = td->td_proc->p_vmspace;
        cap_rights_t rights;
@@ -251,6 +251,13 @@ sys_mmap(td, uap)
        size += pageoff;                        /* low end... */
        size = (vm_size_t) round_page(size);    /* hi end */
 
+       /* Ensure alignment is at least a page and fits in a pointer. */
+       align = flags & MAP_ALIGNMENT_MASK;
+       if (align != 0 && align != MAP_ALIGNED_SUPER &&
+           (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
+           align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
+               return (EINVAL);
+
        /*
         * Check for illegal addresses.  Watch out for address wrap... Note
         * that VM_*_ADDRESS are not constants due to casts (argh).
@@ -1490,7 +1497,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
        boolean_t fitit;
        vm_object_t object = NULL;
        struct thread *td = curthread;
-       int docow, error, rv;
+       int docow, error, findspace, rv;
        boolean_t writecounted;
 
        if (size == 0)
@@ -1605,12 +1612,17 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
        if (flags & MAP_STACK)
                rv = vm_map_stack(map, *addr, size, prot, maxprot,
                    docow | MAP_STACK_GROWS_DOWN);
-       else if (fitit)
-               rv = vm_map_find(map, object, foff, addr, size,
-                   object != NULL && object->type == OBJT_DEVICE ?
-                   VMFS_ALIGNED_SPACE : VMFS_OPTIMAL_SPACE, prot, maxprot,
-                   docow);
-       else
+       else if (fitit) {
+               if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
+                       findspace = VMFS_SUPER_SPACE;
+               else if ((flags & MAP_ALIGNMENT_MASK) != 0)
+                       findspace = VMFS_ALIGNED_SPACE(flags >>
+                           MAP_ALIGNMENT_SHIFT);
+               else
+                       findspace = VMFS_OPTIMAL_SPACE;
+               rv = vm_map_find(map, object, foff, addr, size, findspace,
+                   prot, maxprot, docow);
+       } else
                rv = vm_map_fixed(map, object, foff, *addr, size,
                                 prot, maxprot, docow);
 

Modified: head/usr.bin/kdump/mksubr
==============================================================================
--- head/usr.bin/kdump/mksubr   Fri Aug 16 21:04:58 2013        (r254429)
+++ head/usr.bin/kdump/mksubr   Fri Aug 16 21:13:55 2013        (r254430)
@@ -385,7 +385,6 @@ auto_switch_type "lio_listioname"      "
 auto_switch_type "madvisebehavname"    "_?MADV_[A-Z]+[[:space:]]+[0-9]+"       
       "sys/mman.h"
 auto_switch_type "minheritname"        "INHERIT_[A-Z]+[[:space:]]+[0-9]+"      
       "sys/mman.h"
 auto_or_type     "mlockallname"        "MCL_[A-Z]+[[:space:]]+0x[0-9]+"        
       "sys/mman.h"
-auto_or_type     "mmapflagsname"       "MAP_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+"  
       "sys/mman.h"
 auto_or_type     "mmapprotname"        "PROT_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" 
       "sys/mman.h"
 auto_or_type     "modename"            "S_[A-Z]+[[:space:]]+[0-6]{7}"          
       "sys/stat.h"
 auto_or_type     "mountflagsname"      "MNT_[A-Z]+[[:space:]]+0x[0-9]+"        
       "sys/mount.h"
@@ -469,6 +468,40 @@ cat <<_EOF_
 /*
  * AUTO - Special
  *
+ * The MAP_ALIGNED flag requires special handling.
+ */
+void
+mmapflagsname(int flags)
+{
+       int align;
+       int or = 0;
+       printf("%#x<", flags);
+_EOF_
+egrep 
"^#[[:space:]]*define[[:space:]]+MAP_[A-Z_]+[[:space:]]+0x[0-9A-Fa-f]+[[:space:]]*"
 \
+       $include_dir/sys/mman.h | grep -v MAP_ALIGNED | \
+       awk '{ for (i = 1; i <= NF; i++) \
+               if ($i ~ /define/) \
+                       break; \
+               ++i; \
+               printf "\tif (!((flags > 0) ^ ((%s) > 
0)))\n\t\tif_print_or(flags, %s, or);\n", $i, $i }'
+cat <<_EOF_
+       align = flags & MAP_ALIGNMENT_MASK;
+       if (align != 0) {
+               if (align == MAP_ALIGNED_SUPER)
+                       print_or("MAP_ALIGNED_SUPER", or);
+               else {
+                       print_or("MAP_ALIGNED", or);
+                       printf("(%d)", align >> MAP_ALIGNMENT_SHIFT);
+               }
+       }
+       printf(">");
+       if (or == 0)
+               printf("<invalid>%d", flags);
+}
+
+/*
+ * AUTO - Special
+ *
  * The only reason this is not fully automated is due to the
  * grep -v RTP_PRIO statement. A better egrep line should
  * make this capable of being a auto_switch_type() function.

Modified: head/usr.bin/truss/syscalls.c
==============================================================================
--- head/usr.bin/truss/syscalls.c       Fri Aug 16 21:04:58 2013        
(r254429)
+++ head/usr.bin/truss/syscalls.c       Fri Aug 16 21:13:55 2013        
(r254430)
@@ -296,7 +296,7 @@ static struct xlat mmap_flags[] = {
        X(MAP_SHARED) X(MAP_PRIVATE) X(MAP_FIXED) X(MAP_RENAME)
        X(MAP_NORESERVE) X(MAP_RESERVED0080) X(MAP_RESERVED0100)
        X(MAP_HASSEMAPHORE) X(MAP_STACK) X(MAP_NOSYNC) X(MAP_ANON)
-       X(MAP_NOCORE) XEND
+       X(MAP_NOCORE) X(MAP_PREFAULT_READ) XEND
 };
 
 static struct xlat mprot_flags[] = {
@@ -893,9 +893,41 @@ print_arg(struct syscall_args *sc, unsig
        case Mprot:
                tmp = strdup(xlookup_bits(mprot_flags, args[sc->offset]));
                break;
-       case Mmapflags:
-               tmp = strdup(xlookup_bits(mmap_flags, args[sc->offset]));
+       case Mmapflags: {
+               const char *base, *alignstr;
+               int align, flags;
+
+               /*
+                * MAP_ALIGNED can't be handled by xlookup_bits(), so
+                * generate that string manually and prepend it to the
+                * string from xlookup_bits().  Have to be careful to
+                * avoid outputting MAP_ALIGNED|0 if MAP_ALIGNED is
+                * the only flag.
+                */
+               flags = args[sc->offset] & ~MAP_ALIGNMENT_MASK;
+               align = args[sc->offset] & MAP_ALIGNMENT_MASK;
+               if (align != 0) {
+                       if (align == MAP_ALIGNED_SUPER)
+                               alignstr = strdup("MAP_ALIGNED_SUPER");
+                       else
+                               asprintf(&alignstr, "MAP_ALIGNED(%d)",
+                                   align >> MAP_ALIGNMENT_SHIFT);
+                       if (flags == 0) {
+                               tmp = alignstr;
+                               break;
+                       }
+               } else
+                       alignstr = NULL;
+               base = strdup(xlookup_bits(mmap_flags, flags));
+               if (alignstr == NULL) {
+                       tmp = base;
+                       break;
+               }
+               asprintf(&tmp, "%s|%s", alignstr, base);
+               free(alignstr);
+               free(base);
                break;
+       }
        case Whence:
                tmp = strdup(xlookup(whence_arg, args[sc->offset]));
                break;
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to