On 29 Jul 2014, at 12:41, Gleb Smirnoff <gleb...@freebsd.org> wrote:

>  Hi!
> 
>  Sorry for top quoting, this is to annoy you :) I got zero
> replies on the below email during a week. I'd really appreciate
> testing on different platforms. Any takers?
OK, it works on an Raspberry pi running r269231 with your patch.
The only suspicious thing I observed was that the number of
'requests for I/O initiated by sendfile' in netstat -m doesn't
always increase. I would expect that. However, I'm not sure if
this is ARM related (I would not think so) or is related to your
patch at all.

Let me know if you need more information.

Best regards
Michael
> 
> On Sat, Jul 19, 2014 at 10:27:25AM +0400, Gleb Smirnoff wrote:
> T>   Hi!
> T> 
> T>   we've got a lot of common code in sys/*/*/vm_machdep.c wrt the
> T> sf_buf allocation. I have gathered it into kern/subr_sfbuf.c.
> T> 
> T> o No MD code left in sys/*/*/vm_machdep.c.
> T> o The arches that have physical map have their implementation in
> T>   machine/sf_buf.h
> T> o The arches that needs sf_bufs use subr_sfbuf.c, optionally having
> T>   some stuff in machine/sf_buf.h
> T> 
> T> I can test only i386. I'd be grateful for testing:
> T> 
> T> arm
> T> mips
> T> mips64
> T> sparc64
> T> powerpc
> T> i386 XEN
> T> 
> T> The test is a simple use of any applcation or test that uses sendfile(2).
> T> The box shouldn't crash :) of course, and after end of a test there
> T> should be no evidence of sf_buf leak (observed via netstat -m).
> T> 
> T> -- 
> T> Totus tuus, Glebius.
> 
> T> Index: sys/amd64/include/sf_buf.h
> T> ===================================================================
> T> --- sys/amd64/include/sf_buf.h     (revision 268750)
> T> +++ sys/amd64/include/sf_buf.h     (working copy)
> T> @@ -29,10 +29,6 @@
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -
> T>  /*
> T>   * On this machine, the only purpose for which sf_buf is used is to 
> implement
> T>   * an opaque pointer required by the machine-independent parts of the 
> kernel.
> T> @@ -39,21 +35,7 @@
> T>   * That pointer references the vm_page that is "mapped" by the sf_buf.  
> The
> T>   * actual mapping is provided by the direct virtual-to-physical mapping.  
> T>   */
> T> -struct sf_buf;
> T> -
> T> -static inline struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int pri)
> T> -{
> T> -
> T> -  return ((struct sf_buf *)m);
> T> -}
> T> -
> T> -static inline void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -}
> T> -
> T> -static __inline vm_offset_t
> T> +static inline vm_offset_t
> T>  sf_buf_kva(struct sf_buf *sf)
> T>  {
> T>  
> T> @@ -60,11 +42,10 @@ sf_buf_kva(struct sf_buf *sf)
> T>    return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
> T>  }
> T>  
> T> -static __inline vm_page_t
> T> +static inline vm_page_t
> T>  sf_buf_page(struct sf_buf *sf)
> T>  {
> T>  
> T>    return ((vm_page_t)sf);
> T>  }
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/arm/arm/vm_machdep.c
> T> ===================================================================
> T> --- sys/arm/arm/vm_machdep.c       (revision 268750)
> T> +++ sys/arm/arm/vm_machdep.c       (working copy)
> T> @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$");
> T>  #include <sys/mbuf.h>
> T>  #include <sys/proc.h>
> T>  #include <sys/socketvar.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/syscall.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/sysent.h>
> T> @@ -83,43 +82,7 @@ __FBSDID("$FreeBSD$");
> T>  CTASSERT(sizeof(struct switchframe) == 24);
> T>  CTASSERT(sizeof(struct trapframe) == 80);
> T>  
> T> -#ifndef NSFBUFS
> T> -#define NSFBUFS           (512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void     sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -LIST_HEAD(sf_head, sf_buf);
> T> -
> T>  /*
> T> - * A hash table of active sendfile(2) buffers
> T> - */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int    sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T> -/*
> T>   * Finish a fork operation, with process p2 nearly set up.
> T>   * Copy and update the pcb, set up the stack so that the child
> T>   * ready to run and return to user mode.
> T> @@ -184,107 +147,7 @@ cpu_thread_swapout(struct thread *td)
> T>  {
> T>  }
> T>  
> T> -/*
> T> - * Detatch mapped page and release resources back to the system.
> T> - */
> T>  void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -
> T> -   mtx_lock(&sf_buf_lock);
> T> -   sf->ref_count--;
> T> -   if (sf->ref_count == 0) {
> T> -           TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -           nsfbufsused--;
> T> -           pmap_kremove(sf->kva);
> T> -           sf->m = NULL;
> T> -           LIST_REMOVE(sf, list_entry);
> T> -           if (sf_buf_alloc_want > 0)
> T> -                   wakeup(&sf_buf_freelist);
> T> -   }
> T> -   mtx_unlock(&sf_buf_lock);
> T> -}
> T> -
> T> -/*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. 
> :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -  struct sf_buf *sf_bufs;
> T> -  vm_offset_t sf_base;
> T> -  int i;
> T> -
> T> -  nsfbufs = NSFBUFS;
> T> -  TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -          
> T> -  sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -  TAILQ_INIT(&sf_buf_freelist);
> T> -  sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -  sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -      M_NOWAIT | M_ZERO);
> T> -  for (i = 0; i < nsfbufs; i++) {
> T> -          sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -          TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -  }
> T> -  sf_buf_alloc_want = 0;
> T> -  mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist. Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -  struct sf_head *hash_list;
> T> -  struct sf_buf *sf;
> T> -  int error;
> T> -
> T> -  hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -  mtx_lock(&sf_buf_lock);
> T> -  LIST_FOREACH(sf, hash_list, list_entry) {
> T> -          if (sf->m == m) {
> T> -                  sf->ref_count++;
> T> -                  if (sf->ref_count == 1) {
> T> -                          TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -                          nsfbufsused++;
> T> -                          nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -                  }
> T> -                  goto done;
> T> -          }
> T> -  }
> T> -  while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -          if (flags & SFB_NOWAIT)
> T> -                  goto done;
> T> -          sf_buf_alloc_want++;
> T> -          SFSTAT_INC(sf_allocwait);
> T> -          error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -              (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -          sf_buf_alloc_want--;
> T> -  
> T> -
> T> -          /*
> T> -           * If we got a signal, don't risk going back to sleep.
> T> -           */
> T> -          if (error)
> T> -                  goto done;
> T> -  }
> T> -  TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -  if (sf->m != NULL)
> T> -          LIST_REMOVE(sf, list_entry);
> T> -  LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -  sf->ref_count = 1;
> T> -  sf->m = m;
> T> -  nsfbufsused++;
> T> -  nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -  pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
> T> -done:
> T> -  mtx_unlock(&sf_buf_lock);
> T> -  return (sf);
> T> -}
> T> -
> T> -void
> T>  cpu_set_syscall_retval(struct thread *td, int error)
> T>  {
> T>    struct trapframe *frame;
> T> Index: sys/arm/include/sf_buf.h
> T> ===================================================================
> T> --- sys/arm/include/sf_buf.h       (revision 268750)
> T> +++ sys/arm/include/sf_buf.h       (working copy)
> T> @@ -29,33 +29,18 @@
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -  LIST_ENTRY(sf_buf) list_entry;  /* list of buffers */
> T> -  TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
> T> -  struct          vm_page *m;     /* currently mapped page */
> T> -  vm_offset_t     kva;            /* va of mapping */
> T> -  int             ref_count;      /* usage of this mapping */
> T> -};
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> +static inline void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T>  {
> T>  
> T> -  return (sf->kva);
> T> +  pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
> T>  }
> T>  
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> +static inline int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T>  {
> T>  
> T> -  return (sf->m);
> T> +  pmap_kremove(sf->kva);
> T> +  return (1);
> T>  }
> T> -
> T> -struct sf_buf *   sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/arm/include/vmparam.h
> T> ===================================================================
> T> --- sys/arm/include/vmparam.h      (revision 268750)
> T> +++ sys/arm/include/vmparam.h      (working copy)
> T> @@ -170,4 +170,7 @@ extern vm_offset_t vm_max_kernel_address;
> T>  #define   VM_MAX_AUTOTUNE_MAXUSERS        384
> T>  #endif
> T>  
> T> +#define   SFBUF
> T> +#define   SFBUF_MAP
> T> +
> T>  #endif    /* _MACHINE_VMPARAM_H_ */
> T> Index: sys/conf/files.arm
> T> ===================================================================
> T> --- sys/conf/files.arm     (revision 268750)
> T> +++ sys/conf/files.arm     (working copy)
> T> @@ -77,6 +77,7 @@ font.h                           optional        sc      
>                 \
> T>    clean   "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 
> ${SC_DFLT_FONT}-8x8"
> T>  kern/subr_busdma_bufalloc.c       standard
> T>  kern/subr_dummy_vdso_tc.c standard
> T> +kern/subr_sfbuf.c         standard
> T>  libkern/arm/aeabi_unwind.c        standard
> T>  libkern/arm/divsi3.S              standard
> T>  libkern/arm/ffs.S         standard
> T> Index: sys/conf/files.i386
> T> ===================================================================
> T> --- sys/conf/files.i386    (revision 268750)
> T> +++ sys/conf/files.i386    (working copy)
> T> @@ -520,6 +520,7 @@ isa/vga_isa.c                  optional vga
> T>  kern/kern_clocksource.c           standard
> T>  kern/imgact_aout.c                optional compat_aout
> T>  kern/imgact_gzip.c                optional gzip
> T> +kern/subr_sfbuf.c         standard
> T>  libkern/divdi3.c          standard
> T>  libkern/flsll.c                   standard
> T>  libkern/memmove.c         standard
> T> Index: sys/conf/files.mips
> T> ===================================================================
> T> --- sys/conf/files.mips    (revision 268750)
> T> +++ sys/conf/files.mips    (working copy)
> T> @@ -51,6 +51,7 @@ mips/mips/vm_machdep.c                   standard
> T>  kern/kern_clocksource.c                   standard
> T>  kern/link_elf_obj.c                       standard
> T>  kern/subr_dummy_vdso_tc.c         standard
> T> +kern/subr_sfbuf.c                 optional        mips | mipsel | mipsn32
> T>  
> T>  # gcc/clang runtime
> T>  libkern/ffsl.c                            standard
> T> Index: sys/conf/files.pc98
> T> ===================================================================
> T> --- sys/conf/files.pc98    (revision 268750)
> T> +++ sys/conf/files.pc98    (working copy)
> T> @@ -205,6 +205,7 @@ i386/svr4/svr4_machdep.c       optional compat_svr4
> T>  kern/kern_clocksource.c           standard
> T>  kern/imgact_aout.c                optional compat_aout
> T>  kern/imgact_gzip.c                optional gzip
> T> +kern/subr_sfbuf.c         standard
> T>  libkern/divdi3.c          standard
> T>  libkern/flsll.c                   standard
> T>  libkern/memmove.c         standard
> T> Index: sys/conf/files.powerpc
> T> ===================================================================
> T> --- sys/conf/files.powerpc (revision 268750)
> T> +++ sys/conf/files.powerpc (working copy)
> T> @@ -71,6 +71,7 @@ dev/vt/hw/ofwfb/ofwfb.c          optional        vt aim
> T>  kern/kern_clocksource.c           standard
> T>  kern/subr_dummy_vdso_tc.c standard
> T>  kern/syscalls.c                   optional        ktr
> T> +kern/subr_sfbuf.c         standard
> T>  libkern/ashldi3.c         optional        powerpc
> T>  libkern/ashrdi3.c         optional        powerpc
> T>  libkern/bcmp.c                    standard
> T> Index: sys/conf/files.sparc64
> T> ===================================================================
> T> --- sys/conf/files.sparc64 (revision 268750)
> T> +++ sys/conf/files.sparc64 (working copy)
> T> @@ -63,6 +63,7 @@ dev/uart/uart_kbd_sun.c          optional        uart sc 
> | vt
> T>  kern/kern_clocksource.c           standard
> T>  kern/subr_dummy_vdso_tc.c standard
> T>  kern/syscalls.c                   optional        ktr
> T> +kern/subr_sfbuf.c         standard
> T>  libkern/ffs.c                     standard
> T>  libkern/ffsl.c                    standard
> T>  libkern/fls.c                     standard
> T> Index: sys/i386/i386/vm_machdep.c
> T> ===================================================================
> T> --- sys/i386/i386/vm_machdep.c     (revision 268750)
> T> +++ sys/i386/i386/vm_machdep.c     (working copy)
> T> @@ -118,38 +118,6 @@ static u_int  cpu_reset_proxyid;
> T>  static volatile u_int     cpu_reset_proxy_active;
> T>  #endif
> T>  
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void       sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -LIST_HEAD(sf_head, sf_buf);
> T> -
> T> -/*
> T> - * A hash table of active sendfile(2) buffers
> T> - */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define   SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int      sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T>  extern int        _ucodesel, _udatasel;
> T>  
> T>  /*
> T> @@ -750,122 +718,13 @@ cpu_reset_real()
> T>  }
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. 
> :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -  struct sf_buf *sf_bufs;
> T> -  vm_offset_t sf_base;
> T> -  int i;
> T> -
> T> -  nsfbufs = NSFBUFS;
> T> -  TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -  sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -  TAILQ_INIT(&sf_buf_freelist);
> T> -  sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -  sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -      M_NOWAIT | M_ZERO);
> T> -  for (i = 0; i < nsfbufs; i++) {
> T> -          sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -          TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -  }
> T> -  sf_buf_alloc_want = 0;
> T> -  mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Invalidate the cache lines that may belong to the page, if
> T> - * (possibly old) mapping of the page by sf buffer exists.  Returns
> T> - * TRUE when mapping was found and cache invalidated.
> T> - */
> T> -boolean_t
> T> -sf_buf_invalidate_cache(vm_page_t m)
> T> -{
> T> -  struct sf_head *hash_list;
> T> -  struct sf_buf *sf;
> T> -  boolean_t ret;
> T> -
> T> -  hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -  ret = FALSE;
> T> -  mtx_lock(&sf_buf_lock);
> T> -  LIST_FOREACH(sf, hash_list, list_entry) {
> T> -          if (sf->m == m) {
> T> -                  /*
> T> -                   * Use pmap_qenter to update the pte for
> T> -                   * existing mapping, in particular, the PAT
> T> -                   * settings are recalculated.
> T> -                   */
> T> -                  pmap_qenter(sf->kva, &m, 1);
> T> -                  pmap_invalidate_cache_range(sf->kva, sf->kva +
> T> -                      PAGE_SIZE);
> T> -                  ret = TRUE;
> T> -                  break;
> T> -          }
> T> -  }
> T> -  mtx_unlock(&sf_buf_lock);
> T> -  return (ret);
> T> -}
> T> -
> T> -/*
> T>   * Get an sf_buf from the freelist.  May block if none are available.
> T>   */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> +void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T>  {
> T>    pt_entry_t opte, *ptep;
> T> -  struct sf_head *hash_list;
> T> -  struct sf_buf *sf;
> T> -#ifdef SMP
> T> -  cpuset_t other_cpus;
> T> -  u_int cpuid;
> T> -#endif
> T> -  int error;
> T>  
> T> -  KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
> T> -      ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
> T> -  hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -  mtx_lock(&sf_buf_lock);
> T> -  LIST_FOREACH(sf, hash_list, list_entry) {
> T> -          if (sf->m == m) {
> T> -                  sf->ref_count++;
> T> -                  if (sf->ref_count == 1) {
> T> -                          TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -                          nsfbufsused++;
> T> -                          nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -                  }
> T> -#ifdef SMP
> T> -                  goto shootdown; 
> T> -#else
> T> -                  goto done;
> T> -#endif
> T> -          }
> T> -  }
> T> -  while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -          if (flags & SFB_NOWAIT)
> T> -                  goto done;
> T> -          sf_buf_alloc_want++;
> T> -          SFSTAT_INC(sf_allocwait);
> T> -          error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -              (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -          sf_buf_alloc_want--;
> T> -
> T> -          /*
> T> -           * If we got a signal, don't risk going back to sleep. 
> T> -           */
> T> -          if (error)
> T> -                  goto done;
> T> -  }
> T> -  TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -  if (sf->m != NULL)
> T> -          LIST_REMOVE(sf, list_entry);
> T> -  LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -  sf->ref_count = 1;
> T> -  sf->m = m;
> T> -  nsfbufsused++;
> T> -  nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -
> T>    /*
> T>     * Update the sf_buf's virtual-to-physical mapping, flushing the
> T>     * virtual address from the TLB.  Since the reference count for 
> T> @@ -876,11 +735,11 @@ cpu_reset_real()
> T>    ptep = vtopte(sf->kva);
> T>    opte = *ptep;
> T>  #ifdef XEN
> T> -       PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
> T> -     | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
> T> +       PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
> T> +     | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
> T>  #else
> T> -  *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
> T> -      pmap_cache_bits(m->md.pat_mode, 0);
> T> +  *ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
> T> +      pmap_cache_bits(sf->m->md.pat_mode, 0);
> T>  #endif
> T>  
> T>    /*
> T> @@ -892,7 +751,21 @@ cpu_reset_real()
> T>  #ifdef SMP
> T>    if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T>            CPU_ZERO(&sf->cpumask);
> T> -shootdown:
> T> +
> T> +  sf_buf_shootdown(sf, flags);
> T> +#else
> T> +  if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T> +          pmap_invalidate_page(kernel_pmap, sf->kva);
> T> +#endif
> T> +}
> T> +
> T> +#ifdef SMP
> T> +void
> T> +sf_buf_shootdown(struct sf_buf *sf, int flags)
> T> +{
> T> +  cpuset_t other_cpus;
> T> +  u_int cpuid;
> T> +
> T>    sched_pin();
> T>    cpuid = PCPU_GET(cpuid);
> T>    if (!CPU_ISSET(cpuid, &sf->cpumask)) {
> T> @@ -909,42 +782,50 @@ cpu_reset_real()
> T>            }
> T>    }
> T>    sched_unpin();
> T> +}
> T> +#endif
> T> +
> T> +/*
> T> + * MD part of sf_buf_free().
> T> + */
> T> +int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T> +{
> T> +#ifdef XEN
> T> +  /*
> T> +   * Xen doesn't like having dangling R/W mappings
> T> +   */
> T> +  pmap_qremove(sf->kva, 1);
> T> +  return (1);
> T>  #else
> T> -  if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
> T> -          pmap_invalidate_page(kernel_pmap, sf->kva);
> T> +  return (0);
> T>  #endif
> T> -done:
> T> -  mtx_unlock(&sf_buf_lock);
> T> -  return (sf);
> T>  }
> T>  
> T> +static void
> T> +sf_buf_invalidate(struct sf_buf *sf)
> T> +{
> T> +  vm_page_t m = sf->m;
> T> +
> T> +  /*
> T> +   * Use pmap_qenter to update the pte for
> T> +   * existing mapping, in particular, the PAT
> T> +   * settings are recalculated.
> T> +   */
> T> +  pmap_qenter(sf->kva, &m, 1);
> T> +  pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
> T> +}
> T> +
> T>  /*
> T> - * Remove a reference from the given sf_buf, adding it to the free
> T> - * list when its reference count reaches zero.  A freed sf_buf still,
> T> - * however, retains its virtual-to-physical mapping until it is
> T> - * recycled or reactivated by sf_buf_alloc(9).
> T> + * Invalidate the cache lines that may belong to the page, if
> T> + * (possibly old) mapping of the page by sf buffer exists.  Returns
> T> + * TRUE when mapping was found and cache invalidated.
> T>   */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> +boolean_t
> T> +sf_buf_invalidate_cache(vm_page_t m)
> T>  {
> T>  
> T> -  mtx_lock(&sf_buf_lock);
> T> -  sf->ref_count--;
> T> -  if (sf->ref_count == 0) {
> T> -          TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -          nsfbufsused--;
> T> -#ifdef XEN
> T> -/*
> T> - * Xen doesn't like having dangling R/W mappings
> T> - */
> T> -          pmap_qremove(sf->kva, 1);
> T> -          sf->m = NULL;
> T> -          LIST_REMOVE(sf, list_entry);
> T> -#endif
> T> -          if (sf_buf_alloc_want > 0)
> T> -                  wakeup(&sf_buf_freelist);
> T> -  }
> T> -  mtx_unlock(&sf_buf_lock);
> T> +  return (sf_buf_process_page(m, sf_buf_invalidate));
> T>  }
> T>  
> T>  /*
> T> Index: sys/i386/include/sf_buf.h
> T> ===================================================================
> T> --- sys/i386/include/sf_buf.h      (revision 268750)
> T> +++ sys/i386/include/sf_buf.h      (working copy)
> T> @@ -1,5 +1,5 @@
> T>  /*-
> T> - * Copyright (c) 2003, 2005 Alan L. Cox <a...@cs.rice.edu>
> T> + * Copyright (c) 2014 Gleb Smirnoff <gleb...@freebsd.org>
> T>   * All rights reserved.
> T>   *
> T>   * Redistribution and use in source and binary forms, with or without
> T> @@ -29,39 +29,8 @@
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#include <sys/_cpuset.h>
> T> -#include <sys/queue.h>
> T> +void sf_buf_map(struct sf_buf *, int);
> T> +int sf_buf_unmap(struct sf_buf *);
> T> +boolean_t sf_buf_invalidate_cache(vm_page_t);
> T>  
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -  LIST_ENTRY(sf_buf) list_entry;  /* list of buffers */
> T> -  TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
> T> -  struct          vm_page *m;     /* currently mapped page */
> T> -  vm_offset_t     kva;            /* va of mapping */
> T> -  int             ref_count;      /* usage of this mapping */
> T> -#ifdef SMP
> T> -  cpuset_t        cpumask;        /* cpus on which mapping is valid */
> T> -#endif
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -  return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -  return (sf->m);
> T> -}
> T> -
> T> -boolean_t sf_buf_invalidate_cache(vm_page_t m);
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/i386/include/vmparam.h
> T> ===================================================================
> T> --- sys/i386/include/vmparam.h     (revision 268750)
> T> +++ sys/i386/include/vmparam.h     (working copy)
> T> @@ -198,4 +198,9 @@
> T>  #define VM_MAX_AUTOTUNE_MAXUSERS 384
> T>  #endif
> T>  
> T> +#define   SFBUF
> T> +#define   SFBUF_MAP
> T> +#define   SFBUF_CPUSET
> T> +#define   SFBUF_PROCESS_PAGE
> T> +
> T>  #endif /* _MACHINE_VMPARAM_H_ */
> T> Index: sys/kern/subr_sfbuf.c
> T> ===================================================================
> T> --- sys/kern/subr_sfbuf.c  (revision 0)
> T> +++ sys/kern/subr_sfbuf.c  (working copy)
> T> @@ -0,0 +1,226 @@
> T> +/*-
> T> + * Copyright (c) 2014 Gleb Smirnoff <gleb...@freebsd.org>
> T> + * Copyright (c) 2003, 2005 Alan L. Cox <a...@cs.rice.edu>
> T> + * All rights reserved.
> T> + *
> T> + * Redistribution and use in source and binary forms, with or without
> T> + * modification, are permitted provided that the following conditions
> T> + * are met:
> T> + * 1. Redistributions of source code must retain the above copyright
> T> + *    notice, this list of conditions and the following disclaimer.
> T> + * 2. Redistributions in binary form must reproduce the above copyright
> T> + *    notice, this list of conditions and the following disclaimer in the
> T> + *    documentation and/or other materials provided with the distribution.
> T> + *
> T> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
> PURPOSE
> T> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
> CONSEQUENTIAL
> T> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
> STRICT
> T> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 
> WAY
> T> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> + * SUCH DAMAGE.
> T> + */
> T> +
> T> +#include <sys/cdefs.h>
> T> +__FBSDID("$FreeBSD$");
> T> +
> T> +#include <sys/param.h>
> T> +#include <sys/kernel.h>
> T> +#include <sys/lock.h>
> T> +#include <sys/malloc.h>
> T> +#include <sys/mutex.h>
> T> +#include <sys/sf_buf.h>
> T> +#include <sys/smp.h>
> T> +#include <sys/sysctl.h>
> T> +
> T> +#include <vm/vm.h>
> T> +#include <vm/vm_extern.h>
> T> +#include <vm/vm_page.h>
> T> +
> T> +#ifndef NSFBUFS
> T> +#define   NSFBUFS         (512 + maxusers * 16)
> T> +#endif
> T> +
> T> +static int nsfbufs;
> T> +static int nsfbufspeak;
> T> +static int nsfbufsused;
> T> +
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> +    "Maximum number of sendfile(2) sf_bufs available");
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> +    "Number of sendfile(2) sf_bufs at peak usage");
> T> +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> +    "Number of sendfile(2) sf_bufs in use");
> T> +
> T> +static void       sf_buf_init(void *arg);
> T> +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> +
> T> +LIST_HEAD(sf_head, sf_buf);
> T> +
> T> +/*
> T> + * A hash table of active sendfile(2) buffers
> T> + */
> T> +static struct sf_head *sf_buf_active;
> T> +static u_long sf_buf_hashmask;
> T> +
> T> +#define   SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> +
> T> +static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> +static u_int      sf_buf_alloc_want;
> T> +
> T> +/*
> T> + * A lock used to synchronize access to the hash table and free list
> T> + */
> T> +static struct mtx sf_buf_lock;
> T> +
> T> +/*
> T> + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. 
> :-))
> T> + */
> T> +static void
> T> +sf_buf_init(void *arg)
> T> +{
> T> +  struct sf_buf *sf_bufs;
> T> +  vm_offset_t sf_base;
> T> +  int i;
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +  if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +          return;
> T> +#endif
> T> +
> T> +  nsfbufs = NSFBUFS;
> T> +  TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> +
> T> +  sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> +  TAILQ_INIT(&sf_buf_freelist);
> T> +  sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> +  sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> +      M_NOWAIT | M_ZERO);
> T> +  KASSERT(sf_bufs, ("%s: malloc failure", __func__));
> T> +  for (i = 0; i < nsfbufs; i++) {
> T> +          sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> +          TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> +  }
> T> +  sf_buf_alloc_want = 0;
> T> +  mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> +}
> T> +
> T> +/*
> T> + * Get an sf_buf from the freelist.  May block if none are available.
> T> + */
> T> +struct sf_buf *
> T> +sf_buf_alloc(struct vm_page *m, int flags)
> T> +{
> T> +  struct sf_head *hash_list;
> T> +  struct sf_buf *sf;
> T> +  int error;
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +  if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +          return ((struct sf_buf *)m);
> T> +#endif
> T> +
> T> +  KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
> T> +      ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
> T> +  hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> +  mtx_lock(&sf_buf_lock);
> T> +  LIST_FOREACH(sf, hash_list, list_entry) {
> T> +          if (sf->m == m) {
> T> +                  sf->ref_count++;
> T> +                  if (sf->ref_count == 1) {
> T> +                          TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> +                          nsfbufsused++;
> T> +                          nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> +                  }
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +                  sf_buf_shootdown(sf, flags);
> T> +#endif
> T> +                  goto done;
> T> +          }
> T> +  }
> T> +  while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> +          if (flags & SFB_NOWAIT)
> T> +                  goto done;
> T> +          sf_buf_alloc_want++;
> T> +          SFSTAT_INC(sf_allocwait);
> T> +          error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> +              (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> +          sf_buf_alloc_want--;
> T> +
> T> +          /*
> T> +           * If we got a signal, don't risk going back to sleep. 
> T> +           */
> T> +          if (error)
> T> +                  goto done;
> T> +  }
> T> +  TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> +  if (sf->m != NULL)
> T> +          LIST_REMOVE(sf, list_entry);
> T> +  LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> +  sf->ref_count = 1;
> T> +  sf->m = m;
> T> +  nsfbufsused++;
> T> +  nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> +  sf_buf_map(sf, flags);
> T> +done:
> T> +  mtx_unlock(&sf_buf_lock);
> T> +  return (sf);
> T> +}
> T> +
> T> +/*
> T> + * Remove a reference from the given sf_buf, adding it to the free
> T> + * list when its reference count reaches zero.  A freed sf_buf still,
> T> + * however, retains its virtual-to-physical mapping until it is
> T> + * recycled or reactivated by sf_buf_alloc(9).
> T> + */
> T> +void
> T> +sf_buf_free(struct sf_buf *sf)
> T> +{
> T> +
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +  if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +          return;
> T> +#endif
> T> +
> T> +  mtx_lock(&sf_buf_lock);
> T> +  sf->ref_count--;
> T> +  if (sf->ref_count == 0) {
> T> +          TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> +          nsfbufsused--;
> T> +          if (sf_buf_unmap(sf)) {
> T> +                  sf->m = NULL;
> T> +                  LIST_REMOVE(sf, list_entry);
> T> +          }
> T> +          if (sf_buf_alloc_want > 0)
> T> +                  wakeup(&sf_buf_freelist);
> T> +  }
> T> +  mtx_unlock(&sf_buf_lock);
> T> +}
> T> +
> T> +#ifdef SFBUF_PROCESS_PAGE
> T> +/*
> T> + * Run callback function on sf_buf that holds a certain page.
> T> + */
> T> +boolean_t
> T> +sf_buf_process_page(vm_page_t m, void (*cb)(struct sf_buf *))
> T> +{
> T> +  struct sf_head *hash_list;
> T> +  struct sf_buf *sf;
> T> +
> T> +  hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> +  mtx_lock(&sf_buf_lock);
> T> +  LIST_FOREACH(sf, hash_list, list_entry) {
> T> +          if (sf->m == m) {
> T> +                  cb(sf);
> T> +                  mtx_unlock(&sf_buf_lock);
> T> +                  return (TRUE);
> T> +          }
> T> +  }
> T> +  mtx_unlock(&sf_buf_lock);
> T> +  return (FALSE);
> T> +}
> T> +#endif    /* SFBUF_PROCESS_PAGE */
> T> 
> T> Property changes on: sys/kern/subr_sfbuf.c
> T> ___________________________________________________________________
> T> Added: svn:mime-type
> T> ## -0,0 +1 ##
> T> +text/plain
> T> \ No newline at end of property
> T> Added: svn:keywords
> T> ## -0,0 +1 ##
> T> +FreeBSD=%H
> T> \ No newline at end of property
> T> Added: svn:eol-style
> T> ## -0,0 +1 ##
> T> +native
> T> \ No newline at end of property
> T> Index: sys/mips/include/sf_buf.h
> T> ===================================================================
> T> --- sys/mips/include/sf_buf.h      (revision 268750)
> T> +++ sys/mips/include/sf_buf.h      (working copy)
> T> @@ -29,31 +29,9 @@
> T>  #ifndef _MACHINE_SF_BUF_H_
> T>  #define _MACHINE_SF_BUF_H_
> T>  
> T> -#ifdef __mips_n64
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -#else
> T> -#include <sys/queue.h>
> T> -#endif
> T> +#ifdef __mips_n64 /* In 64 bit the whole memory is directly mapped */
> T>  
> T> -#ifdef __mips_n64
> T> -/* In 64 bit the whole memory is directly mapped */
> T> -struct    sf_buf;
> T> -
> T> -static inline struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int pri)
> T> -{
> T> -
> T> -  return ((struct sf_buf *)m);
> T> -}
> T> -
> T> -static inline void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -}
> T> -
> T> -static __inline vm_offset_t
> T> +static inline vm_offset_t
> T>  sf_buf_kva(struct sf_buf *sf)
> T>  {
> T>    vm_page_t       m;
> T> @@ -62,7 +40,7 @@ sf_buf_kva(struct sf_buf *sf)
> T>    return (MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)));
> T>  }
> T>  
> T> -static __inline struct vm_page *
> T> +static inline struct vm_page *
> T>  sf_buf_page(struct sf_buf *sf)
> T>  {
> T>  
> T> @@ -69,31 +47,5 @@ sf_buf_page(struct sf_buf *sf)
> T>    return ((vm_page_t)sf);
> T>  }
> T>  
> T> -#else /* ! __mips_n64 */
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -  SLIST_ENTRY(sf_buf) free_list;  /* list of free buffer slots */
> T> -  struct          vm_page *m;     /* currently mapped page */
> T> -  vm_offset_t     kva;            /* va of mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -  return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -  return (sf->m);
> T> -}
> T>  #endif /* __mips_n64 */
> T> -
> T>  #endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/mips/include/vmparam.h
> T> ===================================================================
> T> --- sys/mips/include/vmparam.h     (revision 268750)
> T> +++ sys/mips/include/vmparam.h     (working copy)
> T> @@ -187,4 +187,8 @@
> T>  
> T>  #define   ZERO_REGION_SIZE        (64 * 1024)     /* 64KB */
> T>  
> T> +#ifndef __mips_n64
> T> +#define   SFBUF
> T> +#endif
> T> +
> T>  #endif /* !_MACHINE_VMPARAM_H_ */
> T> Index: sys/mips/mips/vm_machdep.c
> T> ===================================================================
> T> --- sys/mips/mips/vm_machdep.c     (revision 268750)
> T> +++ sys/mips/mips/vm_machdep.c     (working copy)
> T> @@ -76,9 +76,6 @@ __FBSDID("$FreeBSD$");
> T>  
> T>  #include <sys/user.h>
> T>  #include <sys/mbuf.h>
> T> -#ifndef __mips_n64
> T> -#include <sys/sf_buf.h>
> T> -#endif
> T>  
> T>  /* Duplicated from asm.h */
> T>  #if defined(__mips_o32)
> T> @@ -92,39 +89,7 @@ __FBSDID("$FreeBSD$");
> T>  #define   CALLFRAME_SIZ   (SZREG * 4)
> T>  #endif
> T>  
> T> -#ifndef __mips_n64
> T> -
> T> -#ifndef NSFBUFS
> T> -#define   NSFBUFS         (512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void       sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T>  /*
> T> - * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with 
> the
> T> - * sf_freelist head with the sf_lock mutex.
> T> - */
> T> -static struct {
> T> -  SLIST_HEAD(, sf_buf) sf_head;
> T> -  struct mtx sf_lock;
> T> -} sf_freelist;
> T> -
> T> -static u_int      sf_buf_alloc_want;
> T> -#endif /* !__mips_n64 */
> T> -
> T> -/*
> T>   * Finish a fork operation, with process p2 nearly set up.
> T>   * Copy and update the pcb, set up the stack so that the child
> T>   * ready to run and return to user mode.
> T> @@ -513,84 +478,6 @@ cpu_set_upcall_kse(struct thread *td, void (*entry
> T>  #define   ZIDLE_HI(v)     ((v) * 4 / 5)
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. 
> :-))
> T> - */
> T> -#ifndef __mips_n64
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -  struct sf_buf *sf_bufs;
> T> -  vm_offset_t sf_base;
> T> -  int i;
> T> -
> T> -  nsfbufs = NSFBUFS;
> T> -  TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -  mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
> T> -  SLIST_INIT(&sf_freelist.sf_head);
> T> -  sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -  sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -      M_NOWAIT | M_ZERO);
> T> -  for (i = 0; i < nsfbufs; i++) {
> T> -          sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -          SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
> T> -  }
> T> -  sf_buf_alloc_want = 0;
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist.  Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -  struct sf_buf *sf;
> T> -  int error;
> T> -
> T> -  mtx_lock(&sf_freelist.sf_lock);
> T> -  while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
> T> -          if (flags & SFB_NOWAIT)
> T> -                  break;
> T> -          sf_buf_alloc_want++;
> T> -          SFSTAT_INC(sf_allocwait);
> T> -          error = msleep(&sf_freelist, &sf_freelist.sf_lock,
> T> -              (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -          sf_buf_alloc_want--;
> T> -
> T> -          /*
> T> -           * If we got a signal, don't risk going back to sleep.
> T> -           */
> T> -          if (error)
> T> -                  break;
> T> -  }
> T> -  if (sf != NULL) {
> T> -          SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
> T> -          sf->m = m;
> T> -          nsfbufsused++;
> T> -          nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -          pmap_qenter(sf->kva, &sf->m, 1);
> T> -  }
> T> -  mtx_unlock(&sf_freelist.sf_lock);
> T> -  return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Release resources back to the system.
> T> - */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -  pmap_qremove(sf->kva, 1);
> T> -  mtx_lock(&sf_freelist.sf_lock);
> T> -  SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
> T> -  nsfbufsused--;
> T> -  if (sf_buf_alloc_want > 0)
> T> -          wakeup(&sf_freelist);
> T> -  mtx_unlock(&sf_freelist.sf_lock);
> T> -}
> T> -#endif    /* !__mips_n64 */
> T> -
> T> -/*
> T>   * Software interrupt handler for queued VM system processing.
> T>   */
> T>  void
> T> Index: sys/powerpc/include/sf_buf.h
> T> ===================================================================
> T> --- sys/powerpc/include/sf_buf.h   (revision 268750)
> T> +++ sys/powerpc/include/sf_buf.h   (working copy)
> T> @@ -1,80 +0,0 @@
> T> -/*-
> T> - * Copyright (c) 2003 Alan L. Cox <a...@cs.rice.edu>
> T> - * All rights reserved.
> T> - *
> T> - * Redistribution and use in source and binary forms, with or without
> T> - * modification, are permitted provided that the following conditions
> T> - * are met:
> T> - * 1. Redistributions of source code must retain the above copyright
> T> - *    notice, this list of conditions and the following disclaimer.
> T> - * 2. Redistributions in binary form must reproduce the above copyright
> T> - *    notice, this list of conditions and the following disclaimer in the
> T> - *    documentation and/or other materials provided with the distribution.
> T> - *
> T> - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
> PURPOSE
> T> - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
> CONSEQUENTIAL
> T> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
> STRICT
> T> - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 
> WAY
> T> - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> - * SUCH DAMAGE.
> T> - *
> T> - * $FreeBSD$
> T> - */
> T> -
> T> -#ifndef _MACHINE_SF_BUF_H_
> T> -#define _MACHINE_SF_BUF_H_
> T> -
> T> -#include <vm/vm.h>
> T> -#include <vm/vm_param.h>
> T> -#include <vm/vm_page.h>
> T> -#include <machine/md_var.h>
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -  LIST_ENTRY(sf_buf) list_entry;  /* list of buffers */
> T> -  TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
> T> -  struct          vm_page *m;     /* currently mapped page */
> T> -  vm_offset_t     kva;            /* va of mapping */
> T> -  int             ref_count;      /* usage of this mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -/*
> T> - * On 32-bit OEA, the only purpose for which sf_buf is used is to 
> implement
> T> - * an opaque pointer required by the machine-independent parts of the 
> kernel.
> T> - * That pointer references the vm_page that is "mapped" by the sf_buf.  
> The
> T> - * actual mapping is provided by the direct virtual-to-physical mapping.  
> T> - *
> T> - * On OEA64 and Book-E, we need to do something a little more 
> complicated. Use
> T> - * the runtime-detected hw_direct_map to pick between the two cases. Our
> T> - * friends in vm_machdep.c will do the same to ensure nothing gets 
> confused.
> T> - */
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -  if (hw_direct_map)
> T> -          return (VM_PAGE_TO_PHYS((vm_page_t)sf));
> T> -
> T> -  return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -  if (hw_direct_map)
> T> -          return ((vm_page_t)sf);
> T> -
> T> -  return (sf->m);
> T> -}
> T> -
> T> -#endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/powerpc/include/vmparam.h
> T> ===================================================================
> T> --- sys/powerpc/include/vmparam.h  (revision 268750)
> T> +++ sys/powerpc/include/vmparam.h  (working copy)
> T> @@ -197,4 +197,18 @@ struct pmap_physseg {
> T>  
> T>  #define   ZERO_REGION_SIZE        (64 * 1024)     /* 64KB */
> T>  
> T> +/*
> T> + * On 32-bit OEA, the only purpose for which sf_buf is used is to 
> implement
> T> + * an opaque pointer required by the machine-independent parts of the 
> kernel.
> T> + * That pointer references the vm_page that is "mapped" by the sf_buf.  
> The
> T> + * actual mapping is provided by the direct virtual-to-physical mapping.
> T> + *
> T> + * On OEA64 and Book-E, we need to do something a little more 
> complicated. Use
> T> + * the runtime-detected hw_direct_map to pick between the two cases. Our
> T> + * friends in vm_machdep.c will do the same to ensure nothing gets 
> confused.
> T> + */
> T> +#define   SFBUF
> T> +#define   SFBUF_NOMD
> T> +#define   SFBUF_OPTIONAL_DIRECT_MAP       hw_direct_map
> T> + 
> T>  #endif /* _MACHINE_VMPARAM_H_ */
> T> Index: sys/powerpc/powerpc/vm_machdep.c
> T> ===================================================================
> T> --- sys/powerpc/powerpc/vm_machdep.c       (revision 268750)
> T> +++ sys/powerpc/powerpc/vm_machdep.c       (working copy)
> T> @@ -80,7 +80,6 @@
> T>  #include <sys/vmmeter.h>
> T>  #include <sys/kernel.h>
> T>  #include <sys/mbuf.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/sysent.h>
> T>  #include <sys/unistd.h>
> T> @@ -100,47 +99,6 @@
> T>  #include <vm/vm_map.h>
> T>  #include <vm/vm_extern.h>
> T>  
> T> -/*
> T> - * On systems without a direct mapped region (e.g. PPC64),
> T> - * we use the same code as the Book E implementation. Since
> T> - * we need to have runtime detection of this, define some machinery
> T> - * for sf_bufs in this case, and ignore it on systems with direct maps.
> T> - */
> T> -
> T> -#ifndef NSFBUFS
> T> -#define NSFBUFS           (512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> - 
> T> -LIST_HEAD(sf_head, sf_buf);
> T> - 
> T> -/* A hash table of active sendfile(2) buffers */
> T> -static struct sf_head *sf_buf_active;
> T> -static u_long sf_buf_hashmask;
> T> -
> T> -#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
> T> -
> T> -static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
> T> -static u_int sf_buf_alloc_want;
> T> -
> T> -/*
> T> - * A lock used to synchronize access to the hash table and free list
> T> - */
> T> -static struct mtx sf_buf_lock;
> T> -
> T>  #ifdef __powerpc64__
> T>  extern uintptr_t tocbase;
> T>  #endif
> T> @@ -245,124 +203,6 @@ cpu_exit(struct thread *td)
> T>  }
> T>  
> T>  /*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. 
> :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -  struct sf_buf *sf_bufs;
> T> -  vm_offset_t sf_base;
> T> -  int i;
> T> -
> T> -  /* Don't bother on systems with a direct map */
> T> -  if (hw_direct_map)
> T> -          return;
> T> -
> T> -  nsfbufs = NSFBUFS;
> T> -  TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -  sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
> T> -  TAILQ_INIT(&sf_buf_freelist);
> T> -  sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -  sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -      M_NOWAIT | M_ZERO);
> T> -
> T> -  for (i = 0; i < nsfbufs; i++) {
> T> -          sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -          TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
> T> -  }
> T> -  sf_buf_alloc_want = 0;
> T> -  mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist. Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -  struct sf_head *hash_list;
> T> -  struct sf_buf *sf;
> T> -  int error;
> T> -
> T> -  if (hw_direct_map) {
> T> -          /* Shortcut the direct mapped case */
> T> -          return ((struct sf_buf *)m);
> T> -  }
> T> -
> T> -  hash_list = &sf_buf_active[SF_BUF_HASH(m)];
> T> -  mtx_lock(&sf_buf_lock);
> T> -  LIST_FOREACH(sf, hash_list, list_entry) {
> T> -          if (sf->m == m) {
> T> -                  sf->ref_count++;
> T> -                  if (sf->ref_count == 1) {
> T> -                          TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -                          nsfbufsused++;
> T> -                          nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -                  }
> T> -                  goto done;
> T> -          }
> T> -  }
> T> -
> T> -  while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
> T> -          if (flags & SFB_NOWAIT)
> T> -                  goto done;
> T> -
> T> -          sf_buf_alloc_want++;
> T> -          SFSTAT_INC(sf_allocwait);
> T> -          error = msleep(&sf_buf_freelist, &sf_buf_lock,
> T> -              (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -          sf_buf_alloc_want--;
> T> -
> T> -          /*
> T> -           * If we got a signal, don't risk going back to sleep.
> T> -           */
> T> -          if (error)
> T> -                  goto done;
> T> -  }
> T> -
> T> -  TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
> T> -  if (sf->m != NULL)
> T> -          LIST_REMOVE(sf, list_entry);
> T> -
> T> -  LIST_INSERT_HEAD(hash_list, sf, list_entry);
> T> -  sf->ref_count = 1;
> T> -  sf->m = m;
> T> -  nsfbufsused++;
> T> -  nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -  pmap_qenter(sf->kva, &sf->m, 1);
> T> -done:
> T> -  mtx_unlock(&sf_buf_lock);
> T> -  return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Detach mapped page and release resources back to the system.
> T> - *
> T> - * Remove a reference from the given sf_buf, adding it to the free
> T> - * list when its reference count reaches zero. A freed sf_buf still,
> T> - * however, retains its virtual-to-physical mapping until it is
> T> - * recycled or reactivated by sf_buf_alloc(9).
> T> - */
> T> -void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -  if (hw_direct_map)
> T> -          return;
> T> -
> T> -  mtx_lock(&sf_buf_lock);
> T> -  sf->ref_count--;
> T> -  if (sf->ref_count == 0) {
> T> -          TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
> T> -          nsfbufsused--;
> T> -
> T> -          if (sf_buf_alloc_want > 0)
> T> -                  wakeup(&sf_buf_freelist);
> T> -  }
> T> -  mtx_unlock(&sf_buf_lock);
> T> -}
> T> -
> T> -/*
> T>   * Software interrupt handler for queued VM system processing.
> T>   */
> T>  void
> T> Index: sys/sparc64/include/sf_buf.h
> T> ===================================================================
> T> --- sys/sparc64/include/sf_buf.h   (revision 268750)
> T> +++ sys/sparc64/include/sf_buf.h   (working copy)
> T> @@ -1,59 +0,0 @@
> T> -/*-
> T> - * Copyright (c) 2003 Alan L. Cox <a...@cs.rice.edu>
> T> - * All rights reserved.
> T> - *
> T> - * Redistribution and use in source and binary forms, with or without
> T> - * modification, are permitted provided that the following conditions
> T> - * are met:
> T> - * 1. Redistributions of source code must retain the above copyright
> T> - *    notice, this list of conditions and the following disclaimer.
> T> - * 2. Redistributions in binary form must reproduce the above copyright
> T> - *    notice, this list of conditions and the following disclaimer in the
> T> - *    documentation and/or other materials provided with the distribution.
> T> - *
> T> - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> T> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> T> - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
> PURPOSE
> T> - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> T> - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
> CONSEQUENTIAL
> T> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> T> - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> T> - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
> STRICT
> T> - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 
> WAY
> T> - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> T> - * SUCH DAMAGE.
> T> - *
> T> - * $FreeBSD$
> T> - */
> T> -
> T> -#ifndef _MACHINE_SF_BUF_H_
> T> -#define _MACHINE_SF_BUF_H_
> T> -
> T> -#include <sys/queue.h>
> T> -
> T> -struct vm_page;
> T> -
> T> -struct sf_buf {
> T> -  SLIST_ENTRY(sf_buf) free_list;  /* list of free buffer slots */
> T> -  struct          vm_page *m;     /* currently mapped page */
> T> -  vm_offset_t     kva;            /* va of mapping */
> T> -};
> T> -
> T> -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
> T> -void sf_buf_free(struct sf_buf *sf);
> T> -
> T> -static __inline vm_offset_t
> T> -sf_buf_kva(struct sf_buf *sf)
> T> -{
> T> -
> T> -  return (sf->kva);
> T> -}
> T> -
> T> -static __inline struct vm_page *
> T> -sf_buf_page(struct sf_buf *sf)
> T> -{
> T> -
> T> -  return (sf->m);
> T> -}
> T> -
> T> -#endif /* !_MACHINE_SF_BUF_H_ */
> T> Index: sys/sparc64/include/vmparam.h
> T> ===================================================================
> T> --- sys/sparc64/include/vmparam.h  (revision 268750)
> T> +++ sys/sparc64/include/vmparam.h  (working copy)
> T> @@ -239,4 +239,7 @@ extern vm_offset_t vm_max_kernel_address;
> T>   */
> T>  #define   ZERO_REGION_SIZE        PAGE_SIZE
> T>  
> T> +#define   SFBUF
> T> +#define   SFBUF_NOMD
> T> +
> T>  #endif /* !_MACHINE_VMPARAM_H_ */
> T> Index: sys/sparc64/sparc64/vm_machdep.c
> T> ===================================================================
> T> --- sys/sparc64/sparc64/vm_machdep.c       (revision 268750)
> T> +++ sys/sparc64/sparc64/vm_machdep.c       (working copy)
> T> @@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
> T>  #include <sys/mutex.h>
> T>  #include <sys/proc.h>
> T>  #include <sys/sysent.h>
> T> -#include <sys/sf_buf.h>
> T>  #include <sys/sched.h>
> T>  #include <sys/sysctl.h>
> T>  #include <sys/unistd.h>
> T> @@ -84,35 +83,6 @@ __FBSDID("$FreeBSD$");
> T>  #include <machine/tlb.h>
> T>  #include <machine/tstate.h>
> T>  
> T> -#ifndef NSFBUFS
> T> -#define   NSFBUFS         (512 + maxusers * 16)
> T> -#endif
> T> -
> T> -static int nsfbufs;
> T> -static int nsfbufspeak;
> T> -static int nsfbufsused;
> T> -
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
> T> -    "Maximum number of sendfile(2) sf_bufs available");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
> T> -    "Number of sendfile(2) sf_bufs at peak usage");
> T> -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
> T> -    "Number of sendfile(2) sf_bufs in use");
> T> -
> T> -static void       sf_buf_init(void *arg);
> T> -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
> T> -
> T> -/*
> T> - * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with 
> the
> T> - * sf_freelist head with the sf_lock mutex.
> T> - */
> T> -static struct {
> T> -  SLIST_HEAD(, sf_buf) sf_head;
> T> -  struct mtx sf_lock;
> T> -} sf_freelist;
> T> -
> T> -static u_int      sf_buf_alloc_want;
> T> -
> T>  PMAP_STATS_VAR(uma_nsmall_alloc);
> T>  PMAP_STATS_VAR(uma_nsmall_alloc_oc);
> T>  PMAP_STATS_VAR(uma_nsmall_free);
> T> @@ -417,84 +387,7 @@ is_physical_memory(vm_paddr_t addr)
> T>    return (0);
> T>  }
> T>  
> T> -/*
> T> - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. 
> :-))
> T> - */
> T> -static void
> T> -sf_buf_init(void *arg)
> T> -{
> T> -  struct sf_buf *sf_bufs;
> T> -  vm_offset_t sf_base;
> T> -  int i;
> T> -
> T> -  nsfbufs = NSFBUFS;
> T> -  TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
> T> -
> T> -  mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
> T> -  SLIST_INIT(&sf_freelist.sf_head);
> T> -  sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
> T> -  sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
> T> -      M_NOWAIT | M_ZERO);
> T> -  for (i = 0; i < nsfbufs; i++) {
> T> -          sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
> T> -          SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
> T> -  }
> T> -  sf_buf_alloc_want = 0;
> T> -}
> T> -
> T> -/*
> T> - * Get an sf_buf from the freelist.  Will block if none are available.
> T> - */
> T> -struct sf_buf *
> T> -sf_buf_alloc(struct vm_page *m, int flags)
> T> -{
> T> -  struct sf_buf *sf;
> T> -  int error;
> T> -
> T> -  mtx_lock(&sf_freelist.sf_lock);
> T> -  while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
> T> -          if (flags & SFB_NOWAIT)
> T> -                  break;
> T> -          sf_buf_alloc_want++;
> T> -          SFSTAT_INC(sf_allocwait);
> T> -          error = msleep(&sf_freelist, &sf_freelist.sf_lock,
> T> -              (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
> T> -          sf_buf_alloc_want--;
> T> -
> T> -          /*
> T> -           * If we got a signal, don't risk going back to sleep.
> T> -           */
> T> -          if (error)
> T> -                  break;
> T> -  }
> T> -  if (sf != NULL) {
> T> -          SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
> T> -          sf->m = m;
> T> -          nsfbufsused++;
> T> -          nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
> T> -          pmap_qenter(sf->kva, &sf->m, 1);
> T> -  }
> T> -  mtx_unlock(&sf_freelist.sf_lock);
> T> -  return (sf);
> T> -}
> T> -
> T> -/*
> T> - * Release resources back to the system.
> T> - */
> T>  void
> T> -sf_buf_free(struct sf_buf *sf)
> T> -{
> T> -
> T> -  pmap_qremove(sf->kva, 1);
> T> -  mtx_lock(&sf_freelist.sf_lock);
> T> -  SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
> T> -  nsfbufsused--;
> T> -  if (sf_buf_alloc_want > 0)
> T> -          wakeup(&sf_freelist);
> T> -  mtx_unlock(&sf_freelist.sf_lock);
> T> -}
> T> -
> T> -void
> T>  swi_vm(void *v)
> T>  {
> T>  
> T> Index: sys/sys/sf_buf.h
> T> ===================================================================
> T> --- sys/sys/sf_buf.h       (revision 268750)
> T> +++ sys/sys/sf_buf.h       (working copy)
> T> @@ -29,6 +29,114 @@
> T>  #ifndef _SYS_SF_BUF_H_
> T>  #define _SYS_SF_BUF_H_
> T>  
> T> +struct sfstat {                           /* sendfile statistics */
> T> +  uint64_t        sf_iocnt;       /* times sendfile had to do disk I/O */
> T> +  uint64_t        sf_allocfail;   /* times sfbuf allocation failed */
> T> +  uint64_t        sf_allocwait;   /* times sfbuf allocation had to wait */
> T> +};
> T> +
> T> +#ifdef _KERNEL
> T> +#include <sys/types.h>
> T> +#include <sys/systm.h>
> T> +#include <sys/counter.h>
> T> +#include <vm/vm.h>
> T> +#include <vm/vm_param.h>
> T> +#include <vm/vm_page.h>
> T> +
> T> +#ifdef SFBUF
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +#include <sys/_cpuset.h>
> T> +#endif
> T> +#include <sys/queue.h>
> T> +
> T> +struct sf_buf {
> T> +  LIST_ENTRY(sf_buf)      list_entry;     /* list of buffers */
> T> +  TAILQ_ENTRY(sf_buf)     free_entry;     /* list of buffers */
> T> +  vm_page_t               m;              /* currently mapped page */
> T> +  vm_offset_t             kva;            /* va of mapping */
> T> +  int                     ref_count;      /* usage of this mapping */
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +  cpuset_t                cpumask;        /* where mapping is valid */
> T> +#endif
> T> +};
> T> +#else /* ! SFBUF */
> T> +struct sf_buf;
> T> +#endif /* SFBUF */
> T> +
> T> +#ifndef SFBUF_NOMD
> T> +#include <machine/sf_buf.h>
> T> +#endif
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +#include <machine/md_var.h>
> T> +#endif
> T> +
> T> +#ifdef SFBUF
> T> +struct sf_buf *sf_buf_alloc(struct vm_page *, int);
> T> +void sf_buf_free(struct sf_buf *);
> T> +
> T> +static inline vm_offset_t
> T> +sf_buf_kva(struct sf_buf *sf)
> T> +{
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +  if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +          return (VM_PAGE_TO_PHYS((vm_page_t)sf));
> T> +#endif
> T> +
> T> +        return (sf->kva);
> T> +}
> T> +
> T> +static inline vm_page_t
> T> +sf_buf_page(struct sf_buf *sf)
> T> +{
> T> +#ifdef SFBUF_OPTIONAL_DIRECT_MAP
> T> +  if (SFBUF_OPTIONAL_DIRECT_MAP)
> T> +          return ((vm_page_t)sf);
> T> +#endif
> T> +
> T> +        return (sf->m);
> T> +}
> T> +
> T> +#ifndef SFBUF_MAP
> T> +#include <vm/pmap.h>
> T> +
> T> +static inline void
> T> +sf_buf_map(struct sf_buf *sf, int flags)
> T> +{
> T> +
> T> +  pmap_qenter(sf->kva, &sf->m, 1);
> T> +}
> T> +
> T> +static inline int
> T> +sf_buf_unmap(struct sf_buf *sf)
> T> +{
> T> +
> T> +  return (0);
> T> +}
> T> +#endif /* SFBUF_MAP */
> T> +
> T> +#if defined(SMP) && defined(SFBUF_CPUSET)
> T> +void sf_buf_shootdown(struct sf_buf *, int);
> T> +#endif
> T> +
> T> +#ifdef SFBUF_PROCESS_PAGE
> T> +boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
> T> +#endif
> T> +
> T> +#else /* ! SFBUF */
> T> +
> T> +static inline struct sf_buf *
> T> +sf_buf_alloc(struct vm_page *m, int pri)
> T> +{
> T> +
> T> +  return ((struct sf_buf *)m);
> T> +}
> T> +
> T> +static inline void
> T> +sf_buf_free(struct sf_buf *sf)
> T> +{
> T> +}
> T> +#endif /* SFBUF */
> T> +
> T>  /*
> T>   * Options to sf_buf_alloc() are specified through its flags argument.  
> This
> T>   * argument's value should be the result of a bitwise or'ing of one or 
> more
> T> @@ -40,19 +148,6 @@
> T>  #define   SFB_DEFAULT     0
> T>  #define   SFB_NOWAIT      4               /* Return NULL if all bufs are 
> used. */
> T>  
> T> -struct vm_page;
> T> -
> T> -struct sfstat {                           /* sendfile statistics */
> T> -  uint64_t        sf_iocnt;       /* times sendfile had to do disk I/O */
> T> -  uint64_t        sf_allocfail;   /* times sfbuf allocation failed */
> T> -  uint64_t        sf_allocwait;   /* times sfbuf allocation had to wait */
> T> -};
> T> -
> T> -#ifdef _KERNEL
> T> -#include <machine/sf_buf.h>
> T> -#include <sys/systm.h>
> T> -#include <sys/counter.h>
> T> -
> T>  extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
> T>  #define   SFSTAT_ADD(name, val)   \
> T>      counter_u64_add(sfstat[offsetof(struct sfstat, name) / 
> sizeof(uint64_t)],\
> 
> T> _______________________________________________
> T> freebsd-current@freebsd.org mailing list
> T> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> T> To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"
> 
> 
> -- 
> Totus tuus, Glebius.
> _______________________________________________
> freebsd-current@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-current
> To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"
> 

_______________________________________________
freebsd-current@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to