On Thu, Oct 19, 2017 at 01:30:15PM -0400, Mathieu Desnoyers wrote:
> [ This patch is sent directly to Linus, because it needs to be merged
>   before the end of 4.14 rc cycle. It introduces a "register private
>   expedited" membarrier command which allows eventual removal of
>   important memory barrier constraints on the scheduler fast-paths. It
>   changes how the "private expedited" membarrier command (new to 4.14)
>   is used from user-space. Sorry to send this late in the cycle. ]
> 
> Provide a command allowing processes to register their intent to use
> the private expedited command. This affects how the expedited private
> command introduced in 4.14-rc is meant to be used, and should be merged
> before 4.14 final.
> 
> Processes are now required to register before using
> MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM.
> 
> This fixes a problem that arose when designing requested extensions to
> sys_membarrier() to allow JITs to efficiently flush old code from
> instruction caches.  Several potential algorithms are much less painful
> if the user register intent to use this functionality early on, for
> example, before the process spawns the second thread.  Registering at
> this time removes the need to interrupt each and every thread in that
> process at the first expedited sys_membarrier() system call.
> 
> Signed-off-by: Mathieu Desnoyers <[email protected]>
> CC: Paul E. McKenney <[email protected]>

This looks much less intrusive than the earlier series!

Acked-by: Paul E. McKenney <[email protected]>

> CC: Peter Zijlstra <[email protected]>
> CC: Ingo Molnar <[email protected]>
> CC: Alexander Viro <[email protected]>
> CC: Linus Torvalds <[email protected]>
> ---
>  fs/exec.c                       |  1 +
>  include/linux/mm_types.h        |  3 +++
>  include/linux/sched/mm.h        | 16 ++++++++++++++++
>  include/uapi/linux/membarrier.h | 23 ++++++++++++++++-------
>  kernel/sched/membarrier.c       | 34 ++++++++++++++++++++++++++++++----
>  5 files changed, 66 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 5470d3c1892a..3e14ba25f678 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename 
> *filename,
>       /* execve succeeded */
>       current->fs->in_exec = 0;
>       current->in_execve = 0;
> +     membarrier_execve(current);
>       acct_update_integrals(current);
>       task_numa_free(current);
>       free_bprm(bprm);
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 46f4ecf5479a..1861ea8dba77 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -445,6 +445,9 @@ struct mm_struct {
>       unsigned long flags; /* Must use atomic bitops to access the bits */
> 
>       struct core_state *core_state; /* coredumping support */
> +#ifdef CONFIG_MEMBARRIER
> +     atomic_t membarrier_state;
> +#endif
>  #ifdef CONFIG_AIO
>       spinlock_t                      ioctx_lock;
>       struct kioctx_table __rcu       *ioctx_table;
> diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> index ae53e413fb13..ab9bf7b73954 100644
> --- a/include/linux/sched/mm.h
> +++ b/include/linux/sched/mm.h
> @@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned 
> int flags)
>       current->flags = (current->flags & ~PF_MEMALLOC) | flags;
>  }
> 
> +#ifdef CONFIG_MEMBARRIER
> +enum {
> +     MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY        = (1U << 0),
> +     MEMBARRIER_STATE_SWITCH_MM                      = (1U << 1),
> +};
> +
> +static inline void membarrier_execve(struct task_struct *t)
> +{
> +     atomic_set(&t->mm->membarrier_state, 0);
> +}
> +#else
> +static inline void membarrier_execve(struct task_struct *t)
> +{
> +}
> +#endif
> +
>  #endif /* _LINUX_SCHED_MM_H */
> diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
> index 6d47b3249d8a..4e01ad7ffe98 100644
> --- a/include/uapi/linux/membarrier.h
> +++ b/include/uapi/linux/membarrier.h
> @@ -52,21 +52,30 @@
>   *                          (non-running threads are de facto in such a
>   *                          state). This only covers threads from the
>   *                          same processes as the caller thread. This
> - *                          command returns 0. The "expedited" commands
> - *                          complete faster than the non-expedited ones,
> - *                          they never block, but have the downside of
> - *                          causing extra overhead.
> + *                          command returns 0 on success. The
> + *                          "expedited" commands complete faster than
> + *                          the non-expedited ones, they never block,
> + *                          but have the downside of causing extra
> + *                          overhead. A process needs to register its
> + *                          intent to use the private expedited command
> + *                          prior to using it, otherwise this command
> + *                          returns -EPERM.
> + * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
> + *                          Register the process intent to use
> + *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
> + *                          returns 0.
>   *
>   * Command to be passed to the membarrier system call. The commands need to
>   * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
>   * the value 0.
>   */
>  enum membarrier_cmd {
> -     MEMBARRIER_CMD_QUERY                    = 0,
> -     MEMBARRIER_CMD_SHARED                   = (1 << 0),
> +     MEMBARRIER_CMD_QUERY                            = 0,
> +     MEMBARRIER_CMD_SHARED                           = (1 << 0),
>       /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
>       /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
> -     MEMBARRIER_CMD_PRIVATE_EXPEDITED        = (1 << 3),
> +     MEMBARRIER_CMD_PRIVATE_EXPEDITED                = (1 << 3),
> +     MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED       = (1 << 4),
>  };
> 
>  #endif /* _UAPI_LINUX_MEMBARRIER_H */
> diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
> index a92fddc22747..dd7908743dab 100644
> --- a/kernel/sched/membarrier.c
> +++ b/kernel/sched/membarrier.c
> @@ -18,6 +18,7 @@
>  #include <linux/membarrier.h>
>  #include <linux/tick.h>
>  #include <linux/cpumask.h>
> +#include <linux/atomic.h>
> 
>  #include "sched.h"   /* for cpu_rq(). */
> 
> @@ -26,21 +27,26 @@
>   * except MEMBARRIER_CMD_QUERY.
>   */
>  #define MEMBARRIER_CMD_BITMASK       \
> -     (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
> +     (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED       \
> +     | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
> 
>  static void ipi_mb(void *info)
>  {
>       smp_mb();       /* IPIs should be serializing but paranoid. */
>  }
> 
> -static void membarrier_private_expedited(void)
> +static int membarrier_private_expedited(void)
>  {
>       int cpu;
>       bool fallback = false;
>       cpumask_var_t tmpmask;
> 
> +     if (!(atomic_read(&current->mm->membarrier_state)
> +                     & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
> +             return -EPERM;
> +
>       if (num_online_cpus() == 1)
> -             return;
> +             return 0;
> 
>       /*
>        * Matches memory barriers around rq->curr modification in
> @@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)
>        * rq->curr modification in scheduler.
>        */
>       smp_mb();       /* exit from system call is not a mb */
> +     return 0;
> +}
> +
> +static void membarrier_register_private_expedited(void)
> +{
> +     struct task_struct *p = current;
> +     struct mm_struct *mm = p->mm;
> +
> +     /*
> +      * We need to consider threads belonging to different thread
> +      * groups, which use the same mm. (CLONE_VM but not
> +      * CLONE_THREAD).
> +      */
> +     if (atomic_read(&mm->membarrier_state)
> +                     & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
> +             return;
> +     atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
> +                     &mm->membarrier_state);
>  }
> 
>  /**
> @@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
>                       synchronize_sched();
>               return 0;
>       case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
> -             membarrier_private_expedited();
> +             return membarrier_private_expedited();
> +     case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
> +             membarrier_register_private_expedited();
>               return 0;
>       default:
>               return -EINVAL;
> -- 
> 2.11.0
> 

Reply via email to