diff --git a/Documentation/device-mapper/dm-integrity.txt 
b/Documentation/device-mapper/dm-integrity.txt
index 297251b0d2d5..bf6af2ade0a6 100644
--- a/Documentation/device-mapper/dm-integrity.txt
+++ b/Documentation/device-mapper/dm-integrity.txt
@@ -146,6 +146,13 @@ block_size:number
        Supported values are 512, 1024, 2048 and 4096 bytes.  If not
        specified the default block size is 512 bytes.
 
+legacy_recalculate
+       Allow recalculating of volumes with HMAC keys. This is disabled by
+       default for security reasons - an attacker could modify the volume,
+       set recalc_sector to zero, and the kernel would not detect the
+       modification.
+
+
 The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
 be changed when reloading the target (load an inactive table and swap the
 tables with suspend and resume). The other arguments should not be changed
diff --git a/Makefile b/Makefile
index 335b015c5c9b..7da0ddd65052 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 4
 PATCHLEVEL = 19
-SUBLEVEL = 171
+SUBLEVEL = 172
 EXTRAVERSION =
 NAME = "People's Front"
 
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 3b78dcda4736..874caed72390 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -650,9 +650,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
 
        spin_lock_irqsave(&mvpwm->lock, flags);
 
-       val = (unsigned long long)
-               readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
-       val *= NSEC_PER_SEC;
+       u = readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
+       val = (unsigned long long) u * NSEC_PER_SEC;
        do_div(val, mvpwm->clk_rate);
        if (val > UINT_MAX)
                state->duty_cycle = UINT_MAX;
@@ -661,21 +660,17 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
        else
                state->duty_cycle = 1;
 
-       val = (unsigned long long)
-               readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
+       val = (unsigned long long) u; /* on duration */
+       /* period = on + off duration */
+       val += readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
        val *= NSEC_PER_SEC;
        do_div(val, mvpwm->clk_rate);
-       if (val < state->duty_cycle) {
+       if (val > UINT_MAX)
+               state->period = UINT_MAX;
+       else if (val)
+               state->period = val;
+       else
                state->period = 1;
-       } else {
-               val -= state->duty_cycle;
-               if (val > UINT_MAX)
-                       state->period = UINT_MAX;
-               else if (val)
-                       state->period = val;
-               else
-                       state->period = 1;
-       }
 
        regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u);
        if (u)
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 523014f2c0eb..8006732b8f42 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -150,9 +150,9 @@ static int wacom_wac_pen_serial_enforce(struct hid_device 
*hdev,
        }
 
        if (flush)
-               wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo);
+               wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo);
        else if (insert)
-               wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo,
+               wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo,
                                       raw_data, report_size);
 
        return insert && !flush;
@@ -1251,7 +1251,7 @@ static void wacom_devm_kfifo_release(struct device *dev, 
void *res)
 static int wacom_devm_kfifo_alloc(struct wacom *wacom)
 {
        struct wacom_wac *wacom_wac = &wacom->wacom_wac;
-       struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo;
+       struct kfifo_rec_ptr_2 *pen_fifo;
        int error;
 
        pen_fifo = devres_alloc(wacom_devm_kfifo_release,
@@ -1268,6 +1268,7 @@ static int wacom_devm_kfifo_alloc(struct wacom *wacom)
        }
 
        devres_add(&wacom->hdev->dev, pen_fifo);
+       wacom_wac->pen_fifo = pen_fifo;
 
        return 0;
 }
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index f67d871841c0..46da97162ef4 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -344,7 +344,7 @@ struct wacom_wac {
        struct input_dev *pen_input;
        struct input_dev *touch_input;
        struct input_dev *pad_input;
-       struct kfifo_rec_ptr_2 pen_fifo;
+       struct kfifo_rec_ptr_2 *pen_fifo;
        int pid;
        int num_contacts_left;
        u8 bt_features;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 1917051b512f..cffd42317272 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -240,6 +240,7 @@ struct dm_integrity_c {
 
        bool journal_uptodate;
        bool just_formatted;
+       bool legacy_recalculate;
 
        struct alg_spec internal_hash_alg;
        struct alg_spec journal_crypt_alg;
@@ -345,6 +346,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic)
        return READ_ONCE(ic->failed);
 }
 
+static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
+{
+       if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) &&
+           !ic->legacy_recalculate)
+               return true;
+       return false;
+}
+
 static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned 
i,
                                          unsigned j, unsigned char seq)
 {
@@ -2503,6 +2512,7 @@ static void dm_integrity_status(struct dm_target *ti, 
status_type_t type,
                arg_count += !!ic->internal_hash_alg.alg_string;
                arg_count += !!ic->journal_crypt_alg.alg_string;
                arg_count += !!ic->journal_mac_alg.alg_string;
+               arg_count += ic->legacy_recalculate;
                DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long 
long)ic->start,
                       ic->tag_size, ic->mode, arg_count);
                if (ic->meta_dev)
@@ -2516,6 +2526,8 @@ static void dm_integrity_status(struct dm_target *ti, 
status_type_t type,
                DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
                DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
                DMEMIT(" commit_time:%u", ic->autocommit_msec);
+               if (ic->legacy_recalculate)
+                       DMEMIT(" legacy_recalculate");
 
 #define EMIT_ALG(a, n)                                                 \
                do {                                                    \
@@ -3118,7 +3130,7 @@ static int dm_integrity_ctr(struct dm_target *ti, 
unsigned argc, char **argv)
        unsigned extra_args;
        struct dm_arg_set as;
        static const struct dm_arg _args[] = {
-               {0, 15, "Invalid number of feature args"},
+               {0, 12, "Invalid number of feature args"},
        };
        unsigned journal_sectors, interleave_sectors, buffer_sectors, 
journal_watermark, sync_msec;
        bool recalculate;
@@ -3248,6 +3260,8 @@ static int dm_integrity_ctr(struct dm_target *ti, 
unsigned argc, char **argv)
                                goto bad;
                } else if (!strcmp(opt_string, "recalculate")) {
                        recalculate = true;
+               } else if (!strcmp(opt_string, "legacy_recalculate")) {
+                       ic->legacy_recalculate = true;
                } else {
                        r = -EINVAL;
                        ti->error = "Invalid argument";
@@ -3523,6 +3537,14 @@ static int dm_integrity_ctr(struct dm_target *ti, 
unsigned argc, char **argv)
                }
        }
 
+       if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
+           le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
+           dm_integrity_disable_recalculate(ic)) {
+               ti->error = "Recalculating with HMAC is disabled for security 
reasons - if you really need it, use the argument \"legacy_recalculate\"";
+               r = -EOPNOTSUPP;
+               goto bad;
+       }
+
        ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : 
ic->dev->bdev,
                        1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, 
NULL, NULL);
        if (IS_ERR(ic->bufio)) {
diff --git a/fs/exec.c b/fs/exec.c
index 52788644c4af..6eea921a7e72 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1011,7 +1011,7 @@ static int exec_mmap(struct mm_struct *mm)
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
        old_mm = current->mm;
-       mm_release(tsk, old_mm);
+       exec_mm_release(tsk, old_mm);
 
        if (old_mm) {
                sync_mm_rss(old_mm);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b2a9c746f8ce..edeb837081c8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5209,7 +5209,7 @@ static int other_inode_match(struct inode * inode, 
unsigned long ino,
            (inode->i_state & I_DIRTY_TIME)) {
                struct ext4_inode_info  *ei = EXT4_I(inode);
 
-               inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+               inode->i_state &= ~I_DIRTY_TIME;
                spin_unlock(&inode->i_lock);
 
                spin_lock(&ei->i_raw_lock);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 15216b440880..f2d0c4acb3cb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1157,7 +1157,7 @@ static bool inode_dirtied_after(struct inode *inode, 
unsigned long t)
  */
 static int move_expired_inodes(struct list_head *delaying_queue,
                               struct list_head *dispatch_queue,
-                              int flags, unsigned long dirtied_before)
+                              unsigned long dirtied_before)
 {
        LIST_HEAD(tmp);
        struct list_head *pos, *node;
@@ -1173,8 +1173,6 @@ static int move_expired_inodes(struct list_head 
*delaying_queue,
                list_move(&inode->i_io_list, &tmp);
                moved++;
                spin_lock(&inode->i_lock);
-               if (flags & EXPIRE_DIRTY_ATIME)
-                       inode->i_state |= I_DIRTY_TIME_EXPIRED;
                inode->i_state |= I_SYNC_QUEUED;
                spin_unlock(&inode->i_lock);
                if (sb_is_blkdev_sb(inode->i_sb))
@@ -1222,11 +1220,11 @@ static void queue_io(struct bdi_writeback *wb, struct 
wb_writeback_work *work,
 
        assert_spin_locked(&wb->list_lock);
        list_splice_init(&wb->b_more_io, &wb->b_io);
-       moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before);
+       moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
        if (!work->for_sync)
                time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
        moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
-                                    EXPIRE_DIRTY_ATIME, time_expire_jif);
+                                    time_expire_jif);
        if (moved)
                wb_io_lists_populated(wb);
        trace_writeback_queue_io(wb, work, dirtied_before, moved);
@@ -1394,26 +1392,26 @@ __writeback_single_inode(struct inode *inode, struct 
writeback_control *wbc)
                        ret = err;
        }
 
+       /*
+        * If the inode has dirty timestamps and we need to write them, call
+        * mark_inode_dirty_sync() to notify the filesystem about it and to
+        * change I_DIRTY_TIME into I_DIRTY_SYNC.
+        */
+       if ((inode->i_state & I_DIRTY_TIME) &&
+           (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
+            time_after(jiffies, inode->dirtied_time_when +
+                       dirtytime_expire_interval * HZ))) {
+               trace_writeback_lazytime(inode);
+               mark_inode_dirty_sync(inode);
+       }
+
        /*
         * Some filesystems may redirty the inode during the writeback
         * due to delalloc, clear dirty metadata flags right before
         * write_inode()
         */
        spin_lock(&inode->i_lock);
-
        dirty = inode->i_state & I_DIRTY;
-       if (inode->i_state & I_DIRTY_TIME) {
-               if ((dirty & I_DIRTY_INODE) ||
-                   wbc->sync_mode == WB_SYNC_ALL ||
-                   unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
-                   unlikely(time_after(jiffies,
-                                       (inode->dirtied_time_when +
-                                        dirtytime_expire_interval * HZ)))) {
-                       dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
-                       trace_writeback_lazytime(inode);
-               }
-       } else
-               inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
        inode->i_state &= ~dirty;
 
        /*
@@ -1434,8 +1432,6 @@ __writeback_single_inode(struct inode *inode, struct 
writeback_control *wbc)
 
        spin_unlock(&inode->i_lock);
 
-       if (dirty & I_DIRTY_TIME)
-               mark_inode_dirty_sync(inode);
        /* Don't write the inode if only I_DIRTY_PAGES was set */
        if (dirty & ~I_DIRTY_PAGES) {
                int err = write_inode(inode, wbc);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index ae453dd236a6..6fcdf7e449fe 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -99,9 +99,9 @@ xfs_trans_log_inode(
         * to log the timestamps, or will clear already cleared fields in the
         * worst case.
         */
-       if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
+       if (inode->i_state & I_DIRTY_TIME) {
                spin_lock(&inode->i_lock);
-               inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+               inode->i_state &= ~I_DIRTY_TIME;
                spin_unlock(&inode->i_lock);
        }
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index de0c13bdcd2c..189d0e111d57 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -445,8 +445,6 @@ struct compat_kexec_segment;
 struct compat_mq_attr;
 struct compat_msgbuf;
 
-extern void compat_exit_robust_list(struct task_struct *curr);
-
 #define BITS_PER_COMPAT_LONG    (8*sizeof(compat_long_t))
 
 #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 876bfb6df06a..b6a955ba6173 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,7 +2071,6 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, 
struct file *filp)
 #define I_DIO_WAKEUP           (1 << __I_DIO_WAKEUP)
 #define I_LINKABLE             (1 << 10)
 #define I_DIRTY_TIME           (1 << 11)
-#define I_DIRTY_TIME_EXPIRED   (1 << 12)
 #define I_WB_SWITCH            (1 << 13)
 #define I_OVL_INUSE            (1 << 14)
 #define I_CREATING             (1 << 15)
diff --git a/include/linux/futex.h b/include/linux/futex.h
index a61bf436dcf3..b70df27d7e85 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -2,7 +2,9 @@
 #ifndef _LINUX_FUTEX_H
 #define _LINUX_FUTEX_H
 
+#include <linux/sched.h>
 #include <linux/ktime.h>
+
 #include <uapi/linux/futex.h>
 
 struct inode;
@@ -51,15 +53,35 @@ union futex_key {
 #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
 
 #ifdef CONFIG_FUTEX
-extern void exit_robust_list(struct task_struct *curr);
+enum {
+       FUTEX_STATE_OK,
+       FUTEX_STATE_EXITING,
+       FUTEX_STATE_DEAD,
+};
 
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-             u32 __user *uaddr2, u32 val2, u32 val3);
-#else
-static inline void exit_robust_list(struct task_struct *curr)
+static inline void futex_init_task(struct task_struct *tsk)
 {
+       tsk->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+       tsk->compat_robust_list = NULL;
+#endif
+       INIT_LIST_HEAD(&tsk->pi_state_list);
+       tsk->pi_state_cache = NULL;
+       tsk->futex_state = FUTEX_STATE_OK;
+       mutex_init(&tsk->futex_exit_mutex);
 }
 
+void futex_exit_recursive(struct task_struct *tsk);
+void futex_exit_release(struct task_struct *tsk);
+void futex_exec_release(struct task_struct *tsk);
+
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+             u32 __user *uaddr2, u32 val2, u32 val3);
+#else
+static inline void futex_init_task(struct task_struct *tsk) { }
+static inline void futex_exit_recursive(struct task_struct *tsk) { }
+static inline void futex_exit_release(struct task_struct *tsk) { }
+static inline void futex_exec_release(struct task_struct *tsk) { }
 static inline long do_futex(u32 __user *uaddr, int op, u32 val,
                            ktime_t *timeout, u32 __user *uaddr2,
                            u32 val2, u32 val3)
@@ -68,12 +90,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 
val,
 }
 #endif
 
-#ifdef CONFIG_FUTEX_PI
-extern void exit_pi_state_list(struct task_struct *curr);
-#else
-static inline void exit_pi_state_list(struct task_struct *curr)
-{
-}
-#endif
-
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c69f308f3a53..5524cd5c6abe 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -996,6 +996,8 @@ struct task_struct {
 #endif
        struct list_head                pi_state_list;
        struct futex_pi_state           *pi_state_cache;
+       struct mutex                    futex_exit_mutex;
+       unsigned int                    futex_state;
 #endif
 #ifdef CONFIG_PERF_EVENTS
        struct perf_event_context       *perf_event_ctxp[perf_nr_task_contexts];
@@ -1377,7 +1379,6 @@ extern struct pid *cad_pid;
  */
 #define PF_IDLE                        0x00000002      /* I am an IDLE thread 
*/
 #define PF_EXITING             0x00000004      /* Getting shut down */
-#define PF_EXITPIDONE          0x00000008      /* PI exit done on shut down */
 #define PF_VCPU                        0x00000010      /* I'm a virtual CPU */
 #define PF_WQ_WORKER           0x00000020      /* I'm a workqueue worker */
 #define PF_FORKNOEXEC          0x00000040      /* Forked but didn't exec */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 766bbe813861..8d3b7e731b74 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -119,8 +119,10 @@ extern struct mm_struct *get_task_mm(struct task_struct 
*task);
  * succeeds.
  */
 extern struct mm_struct *mm_access(struct task_struct *task, unsigned int 
mode);
-/* Remove the current tasks stale references to the old mm_struct */
-extern void mm_release(struct task_struct *, struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct on exit() */
+extern void exit_mm_release(struct task_struct *, struct mm_struct *);
+/* Remove the current tasks stale references to the old mm_struct on exec() */
+extern void exec_mm_release(struct task_struct *, struct mm_struct *);
 
 #ifdef CONFIG_MEMCG
 extern void mm_update_next_owner(struct mm_struct *mm);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 29d09755e5cf..146e7b3faa85 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -20,7 +20,6 @@
                {I_CLEAR,               "I_CLEAR"},             \
                {I_SYNC,                "I_SYNC"},              \
                {I_DIRTY_TIME,          "I_DIRTY_TIME"},        \
-               {I_DIRTY_TIME_EXPIRED,  "I_DIRTY_TIME_EXPIRED"}, \
                {I_REFERENCED,          "I_REFERENCED"}         \
        )
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 65133ebddfad..908e7a33e1fc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -498,7 +498,7 @@ static void exit_mm(void)
        struct mm_struct *mm = current->mm;
        struct core_state *core_state;
 
-       mm_release(current, mm);
+       exit_mm_release(current, mm);
        if (!mm)
                return;
        sync_mm_rss(mm);
@@ -818,32 +818,12 @@ void __noreturn do_exit(long code)
         */
        if (unlikely(tsk->flags & PF_EXITING)) {
                pr_alert("Fixing recursive fault but reboot is needed!\n");
-               /*
-                * We can do this unlocked here. The futex code uses
-                * this flag just to verify whether the pi state
-                * cleanup has been done or not. In the worst case it
-                * loops once more. We pretend that the cleanup was
-                * done as there is no way to return. Either the
-                * OWNER_DIED bit is set by now or we push the blocked
-                * task into the wait for ever nirwana as well.
-                */
-               tsk->flags |= PF_EXITPIDONE;
+               futex_exit_recursive(tsk);
                set_current_state(TASK_UNINTERRUPTIBLE);
                schedule();
        }
 
        exit_signals(tsk);  /* sets PF_EXITING */
-       /*
-        * Ensure that all new tsk->pi_lock acquisitions must observe
-        * PF_EXITING. Serializes against futex.c:attach_to_pi_owner().
-        */
-       smp_mb();
-       /*
-        * Ensure that we must observe the pi_state in exit_mm() ->
-        * mm_release() -> exit_pi_state_list().
-        */
-       raw_spin_lock_irq(&tsk->pi_lock);
-       raw_spin_unlock_irq(&tsk->pi_lock);
 
        /* sync mm's RSS info before statistics gathering */
        if (tsk->mm)
@@ -918,12 +898,6 @@ void __noreturn do_exit(long code)
         * Make sure we are holding no locks:
         */
        debug_check_no_locks_held();
-       /*
-        * We can do this unlocked here. The futex code uses this flag
-        * just to verify whether the pi state cleanup has been done
-        * or not. In the worst case it loops once more.
-        */
-       tsk->flags |= PF_EXITPIDONE;
 
        if (tsk->io_context)
                exit_io_context(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index f2c92c100194..cf535b9d5db7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1217,24 +1217,8 @@ static int wait_for_vfork_done(struct task_struct *child,
  * restoring the old one. . .
  * Eric Biederman 10 January 1998
  */
-void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
-       /* Get rid of any futexes when releasing the mm */
-#ifdef CONFIG_FUTEX
-       if (unlikely(tsk->robust_list)) {
-               exit_robust_list(tsk);
-               tsk->robust_list = NULL;
-       }
-#ifdef CONFIG_COMPAT
-       if (unlikely(tsk->compat_robust_list)) {
-               compat_exit_robust_list(tsk);
-               tsk->compat_robust_list = NULL;
-       }
-#endif
-       if (unlikely(!list_empty(&tsk->pi_state_list)))
-               exit_pi_state_list(tsk);
-#endif
-
        uprobe_free_utask(tsk);
 
        /* Get rid of any cached register state */
@@ -1267,6 +1251,18 @@ void mm_release(struct task_struct *tsk, struct 
mm_struct *mm)
                complete_vfork_done(tsk);
 }
 
+void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+       futex_exit_release(tsk);
+       mm_release(tsk, mm);
+}
+
+void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+       futex_exec_release(tsk);
+       mm_release(tsk, mm);
+}
+
 /*
  * Allocate a new mm structure and copy contents from the
  * mm structure of the passed in task structure.
@@ -1937,14 +1933,8 @@ static __latent_entropy struct task_struct *copy_process(
 #ifdef CONFIG_BLOCK
        p->plug = NULL;
 #endif
-#ifdef CONFIG_FUTEX
-       p->robust_list = NULL;
-#ifdef CONFIG_COMPAT
-       p->compat_robust_list = NULL;
-#endif
-       INIT_LIST_HEAD(&p->pi_state_list);
-       p->pi_state_cache = NULL;
-#endif
+       futex_init_task(p);
+
        /*
         * sigaltstack should be cleared when sharing the same VM
         */
diff --git a/kernel/futex.c b/kernel/futex.c
index 334dc4cae780..224adcdac6c1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -341,6 +341,12 @@ static inline bool should_fail_futex(bool fshared)
 }
 #endif /* CONFIG_FAIL_FUTEX */
 
+#ifdef CONFIG_COMPAT
+static void compat_exit_robust_list(struct task_struct *curr);
+#else
+static inline void compat_exit_robust_list(struct task_struct *curr) { }
+#endif
+
 static inline void futex_get_mm(union futex_key *key)
 {
        mmgrab(key->private.mm);
@@ -833,6 +839,29 @@ static struct futex_pi_state *alloc_pi_state(void)
        return pi_state;
 }
 
+static void pi_state_update_owner(struct futex_pi_state *pi_state,
+                                 struct task_struct *new_owner)
+{
+       struct task_struct *old_owner = pi_state->owner;
+
+       lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
+
+       if (old_owner) {
+               raw_spin_lock(&old_owner->pi_lock);
+               WARN_ON(list_empty(&pi_state->list));
+               list_del_init(&pi_state->list);
+               raw_spin_unlock(&old_owner->pi_lock);
+       }
+
+       if (new_owner) {
+               raw_spin_lock(&new_owner->pi_lock);
+               WARN_ON(!list_empty(&pi_state->list));
+               list_add(&pi_state->list, &new_owner->pi_state_list);
+               pi_state->owner = new_owner;
+               raw_spin_unlock(&new_owner->pi_lock);
+       }
+}
+
 static void get_pi_state(struct futex_pi_state *pi_state)
 {
        WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount));
@@ -855,17 +884,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
         * and has cleaned up the pi_state already
         */
        if (pi_state->owner) {
-               struct task_struct *owner;
                unsigned long flags;
 
                raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
-               owner = pi_state->owner;
-               if (owner) {
-                       raw_spin_lock(&owner->pi_lock);
-                       list_del_init(&pi_state->list);
-                       raw_spin_unlock(&owner->pi_lock);
-               }
-               rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
+               pi_state_update_owner(pi_state, NULL);
+               rt_mutex_proxy_unlock(&pi_state->pi_mutex);
                raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, 
flags);
        }
 
@@ -890,7 +913,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
  * Kernel cleans up PI-state, but userspace is likely hosed.
  * (Robust-futex cleanup is separate and might save the day for userspace.)
  */
-void exit_pi_state_list(struct task_struct *curr)
+static void exit_pi_state_list(struct task_struct *curr)
 {
        struct list_head *next, *head = &curr->pi_state_list;
        struct futex_pi_state *pi_state;
@@ -960,7 +983,8 @@ void exit_pi_state_list(struct task_struct *curr)
        }
        raw_spin_unlock_irq(&curr->pi_lock);
 }
-
+#else
+static inline void exit_pi_state_list(struct task_struct *curr) { }
 #endif
 
 /*
@@ -1010,7 +1034,8 @@ void exit_pi_state_list(struct task_struct *curr)
  *     FUTEX_OWNER_DIED bit. See [4]
  *
  * [10] There is no transient state which leaves owner and user space
- *     TID out of sync.
+ *     TID out of sync. Except one error case where the kernel is denied
+ *     write access to the user address, see fixup_pi_state_owner().
  *
  *
  * Serialization and lifetime rules:
@@ -1169,16 +1194,47 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 
uval,
        return ret;
 }
 
+/**
+ * wait_for_owner_exiting - Block until the owner has exited
+ * @exiting:   Pointer to the exiting task
+ *
+ * Caller must hold a refcount on @exiting.
+ */
+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
+{
+       if (ret != -EBUSY) {
+               WARN_ON_ONCE(exiting);
+               return;
+       }
+
+       if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
+               return;
+
+       mutex_lock(&exiting->futex_exit_mutex);
+       /*
+        * No point in doing state checking here. If the waiter got here
+        * while the task was in exec()->exec_futex_release() then it can
+        * have any FUTEX_STATE_* value when the waiter has acquired the
+        * mutex. OK, if running, EXITING or DEAD if it reached exit()
+        * already. Highly unlikely and not a problem. Just one more round
+        * through the futex maze.
+        */
+       mutex_unlock(&exiting->futex_exit_mutex);
+
+       put_task_struct(exiting);
+}
+
 static int handle_exit_race(u32 __user *uaddr, u32 uval,
                            struct task_struct *tsk)
 {
        u32 uval2;
 
        /*
-        * If PF_EXITPIDONE is not yet set, then try again.
+        * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+        * caller that the alleged owner is busy.
         */
-       if (tsk && !(tsk->flags & PF_EXITPIDONE))
-               return -EAGAIN;
+       if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
+               return -EBUSY;
 
        /*
         * Reread the user space value to handle the following situation:
@@ -1196,8 +1252,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
         *    *uaddr = 0xC0000000;           tsk = get_task(PID);
         *   }                               if (!tsk->flags & PF_EXITING) {
         *  ...                                attach();
-        *  tsk->flags |= PF_EXITPIDONE;     } else {
-        *                                     if (!(tsk->flags & 
PF_EXITPIDONE))
+        *  tsk->futex_state =               } else {
+        *      FUTEX_STATE_DEAD;              if (tsk->futex_state !=
+        *                                        FUTEX_STATE_DEAD)
         *                                       return -EAGAIN;
         *                                     return -ESRCH; <--- FAIL
         *                                   }
@@ -1228,7 +1285,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
  * it after doing proper sanity checks.
  */
 static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key 
*key,
-                             struct futex_pi_state **ps)
+                             struct futex_pi_state **ps,
+                             struct task_struct **exiting)
 {
        pid_t pid = uval & FUTEX_TID_MASK;
        struct futex_pi_state *pi_state;
@@ -1253,22 +1311,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 
uval, union futex_key *key,
        }
 
        /*
-        * We need to look at the task state flags to figure out,
-        * whether the task is exiting. To protect against the do_exit
-        * change of the task flags, we do this protected by
-        * p->pi_lock:
+        * We need to look at the task state to figure out, whether the
+        * task is exiting. To protect against the change of the task state
+        * in futex_exit_release(), we do this protected by p->pi_lock:
         */
        raw_spin_lock_irq(&p->pi_lock);
-       if (unlikely(p->flags & PF_EXITING)) {
+       if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
                /*
-                * The task is on the way out. When PF_EXITPIDONE is
-                * set, we know that the task has finished the
-                * cleanup:
+                * The task is on the way out. When the futex state is
+                * FUTEX_STATE_DEAD, we know that the task has finished
+                * the cleanup:
                 */
                int ret = handle_exit_race(uaddr, uval, p);
 
                raw_spin_unlock_irq(&p->pi_lock);
-               put_task_struct(p);
+               /*
+                * If the owner task is between FUTEX_STATE_EXITING and
+                * FUTEX_STATE_DEAD then store the task pointer and keep
+                * the reference on the task struct. The calling code will
+                * drop all locks, wait for the task to reach
+                * FUTEX_STATE_DEAD and then drop the refcount. This is
+                * required to prevent a live lock when the current task
+                * preempted the exiting task between the two states.
+                */
+               if (ret == -EBUSY)
+                       *exiting = p;
+               else
+                       put_task_struct(p);
                return ret;
        }
 
@@ -1307,7 +1376,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 
uval, union futex_key *key,
 
 static int lookup_pi_state(u32 __user *uaddr, u32 uval,
                           struct futex_hash_bucket *hb,
-                          union futex_key *key, struct futex_pi_state **ps)
+                          union futex_key *key, struct futex_pi_state **ps,
+                          struct task_struct **exiting)
 {
        struct futex_q *top_waiter = futex_top_waiter(hb, key);
 
@@ -1322,7 +1392,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
         * We are the first waiter - try to look up the owner based on
         * @uval and attach to it.
         */
-       return attach_to_pi_owner(uaddr, uval, key, ps);
+       return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
 }
 
 static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1350,6 +1420,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 
uval, u32 newval)
  *                     lookup
  * @task:              the task to perform the atomic lock work for.  This will
  *                     be "current" except in the case of requeue pi.
+ * @exiting:           Pointer to store the task pointer of the owner task
+ *                     which is in the middle of exiting
  * @set_waiters:       force setting the FUTEX_WAITERS bit (1) or not (0)
  *
  * Return:
@@ -1358,11 +1430,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 
uval, u32 newval)
  *  - <0 - error
  *
  * The hb->lock and futex_key refs shall be held by the caller.
+ *
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
  */
 static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket 
*hb,
                                union futex_key *key,
                                struct futex_pi_state **ps,
-                               struct task_struct *task, int set_waiters)
+                               struct task_struct *task,
+                               struct task_struct **exiting,
+                               int set_waiters)
 {
        u32 uval, newval, vpid = task_pid_vnr(task);
        struct futex_q *top_waiter;
@@ -1432,7 +1510,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct 
futex_hash_bucket *hb,
         * attach to the owner. If that fails, no harm done, we only
         * set the FUTEX_WAITERS bit in the user space variable.
         */
-       return attach_to_pi_owner(uaddr, newval, key, ps);
+       return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
 }
 
 /**
@@ -1537,26 +1615,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, 
struct futex_pi_state *pi_
                        ret = -EINVAL;
        }
 
-       if (ret)
-               goto out_unlock;
-
-       /*
-        * This is a point of no return; once we modify the uval there is no
-        * going back and subsequent operations must not fail.
-        */
-
-       raw_spin_lock(&pi_state->owner->pi_lock);
-       WARN_ON(list_empty(&pi_state->list));
-       list_del_init(&pi_state->list);
-       raw_spin_unlock(&pi_state->owner->pi_lock);
-
-       raw_spin_lock(&new_owner->pi_lock);
-       WARN_ON(!list_empty(&pi_state->list));
-       list_add(&pi_state->list, &new_owner->pi_state_list);
-       pi_state->owner = new_owner;
-       raw_spin_unlock(&new_owner->pi_lock);
-
-       postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
+       if (!ret) {
+               /*
+                * This is a point of no return; once we modified the uval
+                * there is no going back and subsequent operations must
+                * not fail.
+                */
+               pi_state_update_owner(pi_state, new_owner);
+               postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, 
&wake_q);
+       }
 
 out_unlock:
        raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
@@ -1853,6 +1920,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union 
futex_key *key,
  * @key1:              the from futex key
  * @key2:              the to futex key
  * @ps:                        address to store the pi_state pointer
+ * @exiting:           Pointer to store the task pointer of the owner task
+ *                     which is in the middle of exiting
  * @set_waiters:       force setting the FUTEX_WAITERS bit (1) or not (0)
  *
  * Try and get the lock on behalf of the top waiter if we can do it atomically.
@@ -1860,16 +1929,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union 
futex_key *key,
  * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
  * hb1 and hb2 must be held by the caller.
  *
+ * @exiting is only set when the return value is -EBUSY. If so, this holds
+ * a refcount on the exiting task on return and the caller needs to drop it
+ * after waiting for the exit to complete.
+ *
  * Return:
  *  -  0 - failed to acquire the lock atomically;
  *  - >0 - acquired the lock, return value is vpid of the top_waiter
  *  - <0 - error
  */
-static int futex_proxy_trylock_atomic(u32 __user *pifutex,
-                                struct futex_hash_bucket *hb1,
-                                struct futex_hash_bucket *hb2,
-                                union futex_key *key1, union futex_key *key2,
-                                struct futex_pi_state **ps, int set_waiters)
+static int
+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
+                          struct futex_hash_bucket *hb2, union futex_key *key1,
+                          union futex_key *key2, struct futex_pi_state **ps,
+                          struct task_struct **exiting, int set_waiters)
 {
        struct futex_q *top_waiter = NULL;
        u32 curval;
@@ -1906,7 +1979,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
         */
        vpid = task_pid_vnr(top_waiter->task);
        ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-                                  set_waiters);
+                                  exiting, set_waiters);
        if (ret == 1) {
                requeue_pi_wake_futex(top_waiter, key2, hb2);
                return vpid;
@@ -2035,6 +2108,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int 
flags,
        }
 
        if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+               struct task_struct *exiting = NULL;
+
                /*
                 * Attempt to acquire uaddr2 and wake the top waiter. If we
                 * intend to requeue waiters, force setting the FUTEX_WAITERS
@@ -2042,7 +2117,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int 
flags,
                 * faults rather in the requeue loop below.
                 */
                ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-                                                &key2, &pi_state, nr_requeue);
+                                                &key2, &pi_state,
+                                                &exiting, nr_requeue);
 
                /*
                 * At this point the top_waiter has either taken uaddr2 or is
@@ -2069,7 +2145,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int 
flags,
                         * If that call succeeds then we have pi_state and an
                         * initial refcount on it.
                         */
-                       ret = lookup_pi_state(uaddr2, ret, hb2, &key2, 
&pi_state);
+                       ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
+                                             &pi_state, &exiting);
                }
 
                switch (ret) {
@@ -2087,17 +2164,24 @@ static int futex_requeue(u32 __user *uaddr1, unsigned 
int flags,
                        if (!ret)
                                goto retry;
                        goto out;
+               case -EBUSY:
                case -EAGAIN:
                        /*
                         * Two reasons for this:
-                        * - Owner is exiting and we just wait for the
+                        * - EBUSY: Owner is exiting and we just wait for the
                         *   exit to complete.
-                        * - The user space value changed.
+                        * - EAGAIN: The user space value changed.
                         */
                        double_unlock_hb(hb1, hb2);
                        hb_waiters_dec(hb2);
                        put_futex_key(&key2);
                        put_futex_key(&key1);
+                       /*
+                        * Handle the case where the owner is in the middle of
+                        * exiting. Wait for the exit to complete otherwise
+                        * this task might loop forever, aka. live lock.
+                        */
+                       wait_for_owner_exiting(ret, exiting);
                        cond_resched();
                        goto retry;
                default:
@@ -2362,18 +2446,13 @@ static void unqueue_me_pi(struct futex_q *q)
        spin_unlock(q->lock_ptr);
 }
 
-static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-                               struct task_struct *argowner)
+static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+                                 struct task_struct *argowner)
 {
+       u32 uval, uninitialized_var(curval), newval, newtid;
        struct futex_pi_state *pi_state = q->pi_state;
-       u32 uval, uninitialized_var(curval), newval;
        struct task_struct *oldowner, *newowner;
-       u32 newtid;
-       int ret, err = 0;
-
-       lockdep_assert_held(q->lock_ptr);
-
-       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+       int err = 0;
 
        oldowner = pi_state->owner;
 
@@ -2407,14 +2486,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, 
struct futex_q *q,
                         * We raced against a concurrent self; things are
                         * already fixed up. Nothing to do.
                         */
-                       ret = 0;
-                       goto out_unlock;
+                       return 0;
                }
 
                if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
-                       /* We got the lock after all, nothing to fix. */
-                       ret = 0;
-                       goto out_unlock;
+                       /* We got the lock. pi_state is correct. Tell caller. */
+                       return 1;
                }
 
                /*
@@ -2441,8 +2518,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct 
futex_q *q,
                         * We raced against a concurrent self; things are
                         * already fixed up. Nothing to do.
                         */
-                       ret = 0;
-                       goto out_unlock;
+                       return 1;
                }
                newowner = argowner;
        }
@@ -2472,22 +2548,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, 
struct futex_q *q,
         * We fixed up user space. Now we need to fix the pi_state
         * itself.
         */
-       if (pi_state->owner != NULL) {
-               raw_spin_lock(&pi_state->owner->pi_lock);
-               WARN_ON(list_empty(&pi_state->list));
-               list_del_init(&pi_state->list);
-               raw_spin_unlock(&pi_state->owner->pi_lock);
-       }
+       pi_state_update_owner(pi_state, newowner);
 
-       pi_state->owner = newowner;
-
-       raw_spin_lock(&newowner->pi_lock);
-       WARN_ON(!list_empty(&pi_state->list));
-       list_add(&pi_state->list, &newowner->pi_state_list);
-       raw_spin_unlock(&newowner->pi_lock);
-       raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-
-       return 0;
+       return argowner == current;
 
        /*
         * In order to reschedule or handle a page fault, we need to drop the
@@ -2508,17 +2571,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, 
struct futex_q *q,
 
        switch (err) {
        case -EFAULT:
-               ret = fault_in_user_writeable(uaddr);
+               err = fault_in_user_writeable(uaddr);
                break;
 
        case -EAGAIN:
                cond_resched();
-               ret = 0;
+               err = 0;
                break;
 
        default:
                WARN_ON_ONCE(1);
-               ret = err;
                break;
        }
 
@@ -2528,17 +2590,44 @@ static int fixup_pi_state_owner(u32 __user *uaddr, 
struct futex_q *q,
        /*
         * Check if someone else fixed it for us:
         */
-       if (pi_state->owner != oldowner) {
-               ret = 0;
-               goto out_unlock;
-       }
+       if (pi_state->owner != oldowner)
+               return argowner == current;
 
-       if (ret)
-               goto out_unlock;
+       /* Retry if err was -EAGAIN or the fault in succeeded */
+       if (!err)
+               goto retry;
 
-       goto retry;
+       /*
+        * fault_in_user_writeable() failed so user state is immutable. At
+        * best we can make the kernel state consistent but user state will
+        * be most likely hosed and any subsequent unlock operation will be
+        * rejected due to PI futex rule [10].
+        *
+        * Ensure that the rtmutex owner is also the pi_state owner despite
+        * the user space value claiming something different. There is no
+        * point in unlocking the rtmutex if current is the owner as it
+        * would need to wait until the next waiter has taken the rtmutex
+        * to guarantee consistent state. Keep it simple. Userspace asked
+        * for this wreckaged state.
+        *
+        * The rtmutex has an owner - either current or some other
+        * task. See the EAGAIN loop above.
+        */
+       pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
 
-out_unlock:
+       return err;
+}
+
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
+                               struct task_struct *argowner)
+{
+       struct futex_pi_state *pi_state = q->pi_state;
+       int ret;
+
+       lockdep_assert_held(q->lock_ptr);
+
+       raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+       ret = __fixup_pi_state_owner(uaddr, q, argowner);
        raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
        return ret;
 }
@@ -2562,8 +2651,6 @@ static long futex_wait_restart(struct restart_block 
*restart);
  */
 static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 {
-       int ret = 0;
-
        if (locked) {
                /*
                 * Got the lock. We might not be the anticipated owner if we
@@ -2574,8 +2661,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q 
*q, int locked)
                 * stable state, anything else needs more attention.
                 */
                if (q->pi_state->owner != current)
-                       ret = fixup_pi_state_owner(uaddr, q, current);
-               goto out;
+                       return fixup_pi_state_owner(uaddr, q, current);
+               return 1;
        }
 
        /*
@@ -2586,24 +2673,17 @@ static int fixup_owner(u32 __user *uaddr, struct 
futex_q *q, int locked)
         * Another speculative read; pi_state->owner == current is unstable
         * but needs our attention.
         */
-       if (q->pi_state->owner == current) {
-               ret = fixup_pi_state_owner(uaddr, q, NULL);
-               goto out;
-       }
+       if (q->pi_state->owner == current)
+               return fixup_pi_state_owner(uaddr, q, NULL);
 
        /*
         * Paranoia check. If we did not take the lock, then we should not be
-        * the owner of the rt_mutex.
+        * the owner of the rt_mutex. Warn and establish consistent state.
         */
-       if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
-               printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
-                               "pi-state %p\n", ret,
-                               q->pi_state->pi_mutex.owner,
-                               q->pi_state->owner);
-       }
+       if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
+               return fixup_pi_state_owner(uaddr, q, current);
 
-out:
-       return ret ? ret : locked;
+       return 0;
 }
 
 /**
@@ -2824,7 +2904,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
                         ktime_t *time, int trylock)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
-       struct futex_pi_state *pi_state = NULL;
+       struct task_struct *exiting = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct futex_hash_bucket *hb;
        struct futex_q q = futex_q_init;
@@ -2852,7 +2932,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
 retry_private:
        hb = queue_lock(&q);
 
-       ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
+       ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
+                                  &exiting, 0);
        if (unlikely(ret)) {
                /*
                 * Atomic work succeeded and we got the lock,
@@ -2865,15 +2946,22 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned 
int flags,
                        goto out_unlock_put_key;
                case -EFAULT:
                        goto uaddr_faulted;
+               case -EBUSY:
                case -EAGAIN:
                        /*
                         * Two reasons for this:
-                        * - Task is exiting and we just wait for the
+                        * - EBUSY: Task is exiting and we just wait for the
                         *   exit to complete.
-                        * - The user space value changed.
+                        * - EAGAIN: The user space value changed.
                         */
                        queue_unlock(hb);
                        put_futex_key(&q.key);
+                       /*
+                        * Handle the case where the owner is in the middle of
+                        * exiting. Wait for the exit to complete otherwise
+                        * this task might loop forever, aka. live lock.
+                        */
+                       wait_for_owner_exiting(ret, exiting);
                        cond_resched();
                        goto retry;
                default:
@@ -2958,23 +3046,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int 
flags,
        if (res)
                ret = (res < 0) ? res : 0;
 
-       /*
-        * If fixup_owner() faulted and was unable to handle the fault, unlock
-        * it and return the fault to userspace.
-        */
-       if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
-               pi_state = q.pi_state;
-               get_pi_state(pi_state);
-       }
-
        /* Unqueue and drop the lock */
        unqueue_me_pi(&q);
 
-       if (pi_state) {
-               rt_mutex_futex_unlock(&pi_state->pi_mutex);
-               put_pi_state(pi_state);
-       }
-
        goto out_put_key;
 
 out_unlock_put_key:
@@ -3240,7 +3314,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, 
unsigned int flags,
                                 u32 __user *uaddr2)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
-       struct futex_pi_state *pi_state = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct futex_hash_bucket *hb;
        union futex_key key2 = FUTEX_KEY_INIT;
@@ -3325,16 +3398,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, 
unsigned int flags,
                if (q.pi_state && (q.pi_state->owner != current)) {
                        spin_lock(q.lock_ptr);
                        ret = fixup_pi_state_owner(uaddr2, &q, current);
-                       if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == 
current) {
-                               pi_state = q.pi_state;
-                               get_pi_state(pi_state);
-                       }
                        /*
                         * Drop the reference to the pi state which
                         * the requeue_pi() code acquired for us.
                         */
                        put_pi_state(q.pi_state);
                        spin_unlock(q.lock_ptr);
+                       /*
+                        * Adjust the return value. It's either -EFAULT or
+                        * success (1) but the caller expects 0 for success.
+                        */
+                       ret = ret < 0 ? ret : 0;
                }
        } else {
                struct rt_mutex *pi_mutex;
@@ -3365,25 +3439,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, 
unsigned int flags,
                if (res)
                        ret = (res < 0) ? res : 0;
 
-               /*
-                * If fixup_pi_state_owner() faulted and was unable to handle
-                * the fault, unlock the rt_mutex and return the fault to
-                * userspace.
-                */
-               if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
-                       pi_state = q.pi_state;
-                       get_pi_state(pi_state);
-               }
-
                /* Unqueue and drop the lock. */
                unqueue_me_pi(&q);
        }
 
-       if (pi_state) {
-               rt_mutex_futex_unlock(&pi_state->pi_mutex);
-               put_pi_state(pi_state);
-       }
-
        if (ret == -EINTR) {
                /*
                 * We've already been requeued, but cannot restart by calling
@@ -3625,7 +3684,7 @@ static inline int fetch_robust_entry(struct robust_list 
__user **entry,
  *
  * We silently return on any sign of list-walking problem.
  */
-void exit_robust_list(struct task_struct *curr)
+static void exit_robust_list(struct task_struct *curr)
 {
        struct robust_list_head __user *head = curr->robust_list;
        struct robust_list __user *entry, *next_entry, *pending;
@@ -3690,6 +3749,114 @@ void exit_robust_list(struct task_struct *curr)
        }
 }
 
+static void futex_cleanup(struct task_struct *tsk)
+{
+       if (unlikely(tsk->robust_list)) {
+               exit_robust_list(tsk);
+               tsk->robust_list = NULL;
+       }
+
+#ifdef CONFIG_COMPAT
+       if (unlikely(tsk->compat_robust_list)) {
+               compat_exit_robust_list(tsk);
+               tsk->compat_robust_list = NULL;
+       }
+#endif
+
+       if (unlikely(!list_empty(&tsk->pi_state_list)))
+               exit_pi_state_list(tsk);
+}
+
+/**
+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk:       task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This is called from the recursive fault handling path in do_exit().
+ *
+ * This is best effort. Either the futex exit code has run already or
+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
+ * take it over. If not, the problem is pushed back to user space. If the
+ * futex exit code did not run yet, then an already queued waiter might
+ * block forever, but there is nothing which can be done about that.
+ */
+void futex_exit_recursive(struct task_struct *tsk)
+{
+       /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
+       if (tsk->futex_state == FUTEX_STATE_EXITING)
+               mutex_unlock(&tsk->futex_exit_mutex);
+       tsk->futex_state = FUTEX_STATE_DEAD;
+}
+
+static void futex_cleanup_begin(struct task_struct *tsk)
+{
+       /*
+        * Prevent various race issues against a concurrent incoming waiter
+        * including live locks by forcing the waiter to block on
+        * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
+        * attach_to_pi_owner().
+        */
+       mutex_lock(&tsk->futex_exit_mutex);
+
+       /*
+        * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
+        *
+        * This ensures that all subsequent checks of tsk->futex_state in
+        * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
+        * tsk->pi_lock held.
+        *
+        * It guarantees also that a pi_state which was queued right before
+        * the state change under tsk->pi_lock by a concurrent waiter must
+        * be observed in exit_pi_state_list().
+        */
+       raw_spin_lock_irq(&tsk->pi_lock);
+       tsk->futex_state = FUTEX_STATE_EXITING;
+       raw_spin_unlock_irq(&tsk->pi_lock);
+}
+
+static void futex_cleanup_end(struct task_struct *tsk, int state)
+{
+       /*
+        * Lockless store. The only side effect is that an observer might
+        * take another loop until it becomes visible.
+        */
+       tsk->futex_state = state;
+       /*
+        * Drop the exit protection. This unblocks waiters which observed
+        * FUTEX_STATE_EXITING to reevaluate the state.
+        */
+       mutex_unlock(&tsk->futex_exit_mutex);
+}
+
+void futex_exec_release(struct task_struct *tsk)
+{
+       /*
+        * The state handling is done for consistency, but in the case of
+        * exec() there is no way to prevent futher damage as the PID stays
+        * the same. But for the unlikely and arguably buggy case that a
+        * futex is held on exec(), this provides at least as much state
+        * consistency protection which is possible.
+        */
+       futex_cleanup_begin(tsk);
+       futex_cleanup(tsk);
+       /*
+        * Reset the state to FUTEX_STATE_OK. The task is alive and about
+        * exec a new binary.
+        */
+       futex_cleanup_end(tsk, FUTEX_STATE_OK);
+}
+
+void futex_exit_release(struct task_struct *tsk)
+{
+       futex_cleanup_begin(tsk);
+       futex_cleanup(tsk);
+       futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
+}
+
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                u32 __user *uaddr2, u32 val2, u32 val3)
 {
@@ -3817,7 +3984,7 @@ static void __user *futex_uaddr(struct robust_list __user 
*entry,
  *
  * We silently return on any sign of list-walking problem.
  */
-void compat_exit_robust_list(struct task_struct *curr)
+static void compat_exit_robust_list(struct task_struct *curr)
 {
        struct compat_robust_list_head __user *head = curr->compat_robust_list;
        struct robust_list __user *entry, *next_entry, *pending;
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 9562aaa2afdc..a5ec4f68527e 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1719,8 +1719,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  * possible because it belongs to the pi_state which is about to be freed
  * and it is not longer visible to other tasks.
  */
-void rt_mutex_proxy_unlock(struct rt_mutex *lock,
-                          struct task_struct *proxy_owner)
+void rt_mutex_proxy_unlock(struct rt_mutex *lock)
 {
        debug_rt_mutex_proxy_unlock(lock);
        rt_mutex_set_owner(lock, NULL);
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index d1d62f942be2..ca6fb489007b 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
 extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
 extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
                                       struct task_struct *proxy_owner);
-extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
-                                 struct task_struct *proxy_owner);
+extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
 extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
 extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
                                     struct rt_mutex_waiter *waiter,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 87ce9736043d..360129e47540 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4393,6 +4393,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, 
int cpu)
 
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
+       /* prevent another thread from changing buffer sizes */
+       mutex_lock(&buffer->mutex);
 
        atomic_inc(&buffer->resize_disabled);
        atomic_inc(&cpu_buffer->record_disabled);
@@ -4416,6 +4418,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, 
int cpu)
 
        atomic_dec(&cpu_buffer->record_disabled);
        atomic_dec(&buffer->resize_disabled);
+
+       mutex_unlock(&buffer->mutex);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
diff --git a/mm/slub.c b/mm/slub.c
index 02295fa61583..eac80b0516fe 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5766,10 +5766,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
 
        s->kobj.kset = kset;
        err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
-       if (err) {
-               kobject_put(&s->kobj);
+       if (err)
                goto out;
-       }
 
        err = sysfs_create_group(&s->kobj, &slab_attr_group);
        if (err)
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 727050c40f09..8a55378e8b7c 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -15,10 +15,6 @@ endef
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
-HOSTCC ?= gcc
-HOSTLD ?= ld
-HOSTAR ?= ar
-
 export HOSTCC HOSTLD HOSTAR
 
 ifeq ($(V),1)
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index baa92279c137..15f32f67cf34 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -7,15 +7,6 @@ ARCH := x86
 endif
 
 # always use the host compiler
-ifneq ($(LLVM),)
-HOSTAR ?= llvm-ar
-HOSTCC ?= clang
-HOSTLD ?= ld.lld
-else
-HOSTAR ?= ar
-HOSTCC ?= gcc
-HOSTLD ?= ld
-endif
 AR      = $(HOSTAR)
 CC      = $(HOSTCC)
 LD      = $(HOSTLD)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0be411695379..678aa7feb84d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -148,10 +148,6 @@ endef
 
 LD += $(EXTRA_LDFLAGS)
 
-HOSTCC  ?= gcc
-HOSTLD  ?= ld
-HOSTAR  ?= ar
-
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 LLVM_CONFIG ?= llvm-config
 
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index fc116c060b98..32ff7baf39df 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -57,7 +57,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
 CROSS_COMPILE ?= $(CROSS)
 LD = $(CC)
-HOSTCC = gcc
 
 # check if compiler option is supported
 cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 
2>&1; then echo "$(1)"; fi;}
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 8fc6b1ca47dc..42dbe05b1807 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -60,6 +60,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
 $(call allow-override,CXX,$(CROSS_COMPILE)g++)
 $(call allow-override,STRIP,$(CROSS_COMPILE)strip)
 
+ifneq ($(LLVM),)
+HOSTAR  ?= llvm-ar
+HOSTCC  ?= clang
+HOSTLD  ?= ld.lld
+else
+HOSTAR  ?= ar
+HOSTCC  ?= gcc
+HOSTLD  ?= ld
+endif
+
 ifeq ($(CC_NO_CLANG), 1)
 EXTRA_WARNINGS += -Wstrict-aliasing=3
 endif
  • Linux 4.19.172 Greg Kroah-Hartman
    • Re: Linux 4.19.172 Greg Kroah-Hartman

Reply via email to