The diff below intorduces a new flag for mmap(2) that creates mappings that cannot fault. Normally, if you mmap a file, and your mapping is larger than the mapped file, memory access to full pages beyond the end of the file will fault. Depending on the OS you will get a SIGSEGV or SIGBUS and if you don't catch those, you die. This is especially nasty if you use file descriptor passing to share the file descriptor with some other process and this other proces ftruncates the file without telling you.
The new xserver that matthieu@ just imported has the new xshm extension which mmaps file descriptors passed by clients through file descriptor passing. To protect itself from being trivially DOSed by a malicious (or careless) client, it keeps a list of mappings and installs a SIGBUS signal handler that checks whether the fault address matches any of these mappings. In that case it mmaps a private anonymous page on top of the faulting address and returns. Since OpenBSD generates SIGSEGV instead of SIGBUS in this case, this doesn't work for us, so I made sure matthieu@ disabled this functionality for now. But the new xshm extension would actually be a nice thing to have as it circumvents certain problems with the traditional xshm extension that we have because of privsep. And file descriptor passing is also being used for DRI3 which we may want to support one day. Oh, and Wayland, which some people claim will replace X any day now, heavily uses mapping file descriptors passed over sockets as well. We could of course change the xserver code to also trap SIGSEGV. But this workaround is rather ugly. So my idea is to make X use this new flag and disable the stupid busfault code. The diff is remarkably simple. We already have the infrastructure in place to replace mapped pages with anons to support MAP_PRIVATE and copy-on-write. This diff simply leverages that infrastructure to replace a page that can't be read from the underlying object by an anonymous pages. Some open issues: * I need to check whether all combinations of flag actually make sense. Should we only support __MAP_NOFAULT with non-anonymous mappings? * Should we only fixup the fault for access beyond the end of the mapped object (VM_PAGER_BAD) and still fault for actual IO erors (VM_PAGER_ERROR)? * Should the flag be exported without the leading underscores since we actually want to encourage its use? Thoughts? Index: sys/mman.h =================================================================== RCS file: /cvs/src/sys/sys/mman.h,v retrieving revision 1.26 diff -u -p -r1.26 mman.h --- sys/mman.h 10 Jul 2014 19:00:23 -0000 1.26 +++ sys/mman.h 30 Sep 2014 20:34:42 -0000 @@ -58,8 +58,9 @@ #define __MAP_NOREPLACE 0x0800 /* fail if address not available */ #define MAP_ANON 0x1000 /* allocated from memory, swap space */ #define MAP_ANONYMOUS MAP_ANON /* alternate POSIX spelling */ +#define __MAP_NOFAULT 0x2000 -#define MAP_FLAGMASK 0x1ff7 +#define MAP_FLAGMASK 0x3ff7 #ifdef _KERNEL /* Index: uvm/uvm.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm.h,v retrieving revision 1.56 diff -u -p -r1.56 uvm.h --- uvm/uvm.h 11 Jul 2014 16:35:40 -0000 1.56 +++ uvm/uvm.h 30 Sep 2014 21:03:43 -0000 @@ -90,7 +90,8 @@ struct uvm { #define UVM_ET_SUBMAP 0x02 /* it is a vm_map submap */ #define UVM_ET_COPYONWRITE 0x04 /* copy_on_write */ #define UVM_ET_NEEDSCOPY 0x08 /* needs_copy */ -#define UVM_ET_HOLE 0x10 /* no backend */ +#define UVM_ET_HOLE 0x10 /* no backend */ +#define UVM_ET_NOFAULT 0x20 /* don't fault */ #define UVM_ET_FREEMAPPED 0x80 /* map entry is on free list (DEBUG) */ #define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0) @@ -98,6 +99,7 @@ struct uvm { #define UVM_ET_ISCOPYONWRITE(E) (((E)->etype & UVM_ET_COPYONWRITE) != 0) #define UVM_ET_ISNEEDSCOPY(E) (((E)->etype & UVM_ET_NEEDSCOPY) != 0) #define UVM_ET_ISHOLE(E) (((E)->etype & UVM_ET_HOLE) != 0) +#define UVM_ET_ISNOFAULT(E) (((E)->etype & UVM_ET_NOFAULT) != 0) #ifdef _KERNEL Index: uvm/uvm_extern.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_extern.h,v retrieving revision 1.119 diff -u -p -r1.119 uvm_extern.h --- uvm/uvm_extern.h 11 Jul 2014 16:35:40 -0000 1.119 +++ uvm/uvm_extern.h 30 Sep 2014 20:08:36 -0000 @@ -148,14 +148,15 @@ typedef int vm_prot_t; #define UVM_ADV_MASK 0x7 /* mask */ /* mapping flags */ -#define UVM_FLAG_FIXED 0x010000 /* find space */ -#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */ -#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */ -#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */ -#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */ -#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */ -#define UVM_FLAG_HOLE 0x400000 /* no backend */ -#define UVM_FLAG_QUERY 0x800000 /* do everything, except actual execution */ +#define UVM_FLAG_FIXED 0x0010000 /* find space */ +#define UVM_FLAG_OVERLAY 0x0020000 /* establish overlay */ +#define UVM_FLAG_NOMERGE 0x0040000 /* don't merge map entries */ +#define UVM_FLAG_COPYONW 0x0080000 /* set copy_on_write flag */ +#define UVM_FLAG_AMAPPAD 0x0100000 /* for bss: pad amap to reduce malloc() */ +#define UVM_FLAG_TRYLOCK 0x0200000 /* fail if we can not lock map */ +#define UVM_FLAG_HOLE 0x0400000 /* no backend */ +#define UVM_FLAG_QUERY 0x0800000 /* do everything, except actual execution */ +#define UVM_FLAG_NOFAULT 0x1000000 /* don't fault */ /* macros to extract info */ #define UVM_PROTECTION(X) ((X) & UVM_PROT_MASK) Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.77 diff -u -p -r1.77 uvm_fault.c --- uvm/uvm_fault.c 7 Sep 2014 08:17:44 -0000 1.77 +++ uvm/uvm_fault.c 30 Sep 2014 19:44:28 -0000 @@ -1114,7 +1114,11 @@ Case2: goto ReFault; } - return (EACCES); /* XXX i/o error */ + if (!UVM_ET_ISNOFAULT(ufi.entry)) + return (EACCES); /* XXX i/o error */ + + uobjpage = PGO_DONTCARE; + promote = TRUE; } /* re-verify the state of the world. */ @@ -1132,7 +1136,7 @@ Case2: } /* didn't get the lock? release the page and retry. */ - if (locked == FALSE) { + if (locked == FALSE && uobjpage != PGO_DONTCARE) { uvm_lock_pageq(); /* make sure it is in queues */ uvm_pageactivate(uobjpage); Index: uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v retrieving revision 1.175 diff -u -p -r1.175 uvm_map.c --- uvm/uvm_map.c 14 Aug 2014 17:21:38 -0000 1.175 +++ uvm/uvm_map.c 30 Sep 2014 20:07:53 -0000 @@ -1142,6 +1142,8 @@ uvm_map(struct vm_map *map, vaddr_t *add entry->etype |= UVM_ET_OBJ; else if (flags & UVM_FLAG_HOLE) entry->etype |= UVM_ET_HOLE; + if (flags & UVM_FLAG_NOFAULT) + entry->etype |= UVM_ET_NOFAULT; if (flags & UVM_FLAG_COPYONW) { entry->etype |= UVM_ET_COPYONWRITE; if ((flags & UVM_FLAG_OVERLAY) == 0) Index: uvm/uvm_mmap.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v retrieving revision 1.98 diff -u -p -r1.98 uvm_mmap.c --- uvm/uvm_mmap.c 12 Jul 2014 18:44:01 -0000 1.98 +++ uvm/uvm_mmap.c 30 Sep 2014 19:58:55 -0000 @@ -1004,6 +1004,8 @@ uvm_mmap(vm_map_t map, vaddr_t *addr, vs if ((flags & MAP_SHARED) == 0) uvmflag |= UVM_FLAG_COPYONW; + if (flags & __MAP_NOFAULT) + uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY); } /* set up mapping flags */