Re: Linux 4.9.21

Greg KH Sat, 08 Apr 2017 01:09:49 -0700
diff --git a/Makefile b/Makefile
index 44960184701a..1523557bd61f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 20
+SUBLEVEL = 21
 EXTRAVERSION =
 NAME = Roaring Lionus
 
diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index ae4b3880616d..4616452ce74d 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -66,14 +66,14 @@
                timer@20200 {
                        compatible = "arm,cortex-a9-global-timer";
                        reg = <0x20200 0x100>;
-                       interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                        clocks = <&periph_clk>;
                };
 
                local-timer@20600 {
                        compatible = "arm,cortex-a9-twd-timer";
                        reg = <0x20600 0x100>;
-                       interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
                        clocks = <&periph_clk>;
                };
 
diff --git a/arch/arm/mach-bcm/bcm_5301x.c b/arch/arm/mach-bcm/bcm_5301x.c
index c8830a2b0d60..fe067f6cebb6 100644
--- a/arch/arm/mach-bcm/bcm_5301x.c
+++ b/arch/arm/mach-bcm/bcm_5301x.c
@@ -9,14 +9,42 @@
 #include <asm/hardware/cache-l2x0.h>
 
 #include <asm/mach/arch.h>
+#include <asm/siginfo.h>
+#include <asm/signal.h>
+
+#define FSR_EXTERNAL           (1 << 12)
+#define FSR_READ               (0 << 10)
+#define FSR_IMPRECISE          0x0406
 
 static const char *const bcm5301x_dt_compat[] __initconst = {
        "brcm,bcm4708",
        NULL,
 };
 
+static int bcm5301x_abort_handler(unsigned long addr, unsigned int fsr,
+                                 struct pt_regs *regs)
+{
+       /*
+        * We want to ignore aborts forwarded from the PCIe bus that are
+        * expected and shouldn't really be passed by the PCIe controller.
+        * The biggest disadvantage is the same FSR code may be reported when
+        * reading non-existing APB register and we shouldn't ignore that.
+        */
+       if (fsr == (FSR_EXTERNAL | FSR_READ | FSR_IMPRECISE))
+               return 0;
+
+       return 1;
+}
+
+static void __init bcm5301x_init_early(void)
+{
+       hook_fault_code(16 + 6, bcm5301x_abort_handler, SIGBUS, BUS_OBJERR,
+                       "imprecise external abort");
+}
+
 DT_MACHINE_START(BCM5301X, "BCM5301X")
        .l2c_aux_val    = 0,
        .l2c_aux_mask   = ~0,
        .dt_compat      = bcm5301x_dt_compat,
+       .init_early     = bcm5301x_init_early,
 MACHINE_END
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 8ac0e5994ed2..0ddf3698b85d 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void)
 DEFINE_HWx_IRQDISPATCH(5)
 #endif
 
+static void ltq_hw_irq_handler(struct irq_desc *desc)
+{
+       ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
+}
+
 #ifdef CONFIG_MIPS_MT_SMP
 void __init arch_init_ipiirq(int irq, struct irqaction *action)
 {
@@ -313,23 +318,19 @@ static struct irqaction irq_call = {
 asmlinkage void plat_irq_dispatch(void)
 {
        unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
-       unsigned int i;
-
-       if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) {
-               do_IRQ(MIPS_CPU_TIMER_IRQ);
-               goto out;
-       } else {
-               for (i = 0; i < MAX_IM; i++) {
-                       if (pending & (CAUSEF_IP2 << i)) {
-                               ltq_hw_irqdispatch(i);
-                               goto out;
-                       }
-               }
+       int irq;
+
+       if (!pending) {
+               spurious_interrupt();
+               return;
        }
-       pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());
 
-out:
-       return;
+       pending >>= CAUSEB_IP;
+       while (pending) {
+               irq = fls(pending) - 1;
+               do_IRQ(MIPS_CPU_IRQ_BASE + irq);
+               pending &= ~BIT(irq);
+       }
 }
 
 static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
@@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = {
        .map = icu_map,
 };
 
-static struct irqaction cascade = {
-       .handler = no_action,
-       .name = "cascade",
-};
-
 int __init icu_of_init(struct device_node *node, struct device_node *parent)
 {
        struct device_node *eiu_node;
@@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct 
device_node *parent)
        mips_cpu_irq_init();
 
        for (i = 0; i < MAX_IM; i++)
-               setup_irq(i + 2, &cascade);
+               irq_set_chained_handler(i + 2, ltq_hw_irq_handler);
 
        if (cpu_has_vint) {
                pr_info("Setting up vectored interrupts\n");
diff --git a/arch/parisc/include/asm/uaccess.h 
b/arch/parisc/include/asm/uaccess.h
index 9a2aee1b90fc..7fcf5128996a 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -68,6 +68,15 @@ struct exception_table_entry {
        ".previous\n"
 
 /*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT into %r8 for a read or write fault, and zeroes the target
+ * register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
+       ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
+
+/*
  * The page fault handler stores, in a per-cpu area, the following information
  * if a fixup routine is available.
  */
@@ -94,7 +103,7 @@ struct exception_data {
 #define __get_user(x, ptr)                               \
 ({                                                       \
        register long __gu_err __asm__ ("r8") = 0;       \
-       register long __gu_val __asm__ ("r9") = 0;       \
+       register long __gu_val;                          \
                                                         \
        load_sr2();                                      \
        switch (sizeof(*(ptr))) {                        \
@@ -110,22 +119,23 @@ struct exception_data {
 })
 
 #define __get_user_asm(ldx, ptr)                        \
-       __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t"     \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
+       __asm__("1: " ldx " 0(%%sr2,%2),%0\n"           \
+               "9:\n"                                  \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
                : "=r"(__gu_val), "=r"(__gu_err)        \
-               : "r"(ptr), "1"(__gu_err)               \
-               : "r1");
+               : "r"(ptr), "1"(__gu_err));
 
 #if !defined(CONFIG_64BIT)
 
 #define __get_user_asm64(ptr)                          \
-       __asm__("\n1:\tldw 0(%%sr2,%2),%0"              \
-               "\n2:\tldw 4(%%sr2,%2),%R0\n\t"         \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\
-               ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\
+       __asm__("   copy %%r0,%R0\n"                    \
+               "1: ldw 0(%%sr2,%2),%0\n"               \
+               "2: ldw 4(%%sr2,%2),%R0\n"              \
+               "9:\n"                                  \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
                : "=r"(__gu_val), "=r"(__gu_err)        \
-               : "r"(ptr), "1"(__gu_err)               \
-               : "r1");
+               : "r"(ptr), "1"(__gu_err));
 
 #endif /* !defined(CONFIG_64BIT) */
 
@@ -151,32 +161,31 @@ struct exception_data {
  * The "__put_user/kernel_asm()" macros tell gcc they read from memory
  * instead of writing. This is because they do not write to any memory
  * gcc knows about, so there are no aliasing issues. These macros must
- * also be aware that "fixup_put_user_skip_[12]" are executed in the
- * context of the fault, and any registers used there must be listed
- * as clobbers. In this case only "r1" is used by the current routines.
- * r8/r9 are already listed as err/val.
+ * also be aware that fixups are executed in the context of the fault,
+ * and any registers used there must be listed as clobbers.
+ * r8 is already listed as err.
  */
 
 #define __put_user_asm(stx, x, ptr)                         \
        __asm__ __volatile__ (                              \
-               "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t"         \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\
+               "1: " stx " %2,0(%%sr2,%1)\n"               \
+               "9:\n"                                      \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)     \
                : "=r"(__pu_err)                            \
-               : "r"(ptr), "r"(x), "0"(__pu_err)           \
-               : "r1")
+               : "r"(ptr), "r"(x), "0"(__pu_err))
 
 
 #if !defined(CONFIG_64BIT)
 
 #define __put_user_asm64(__val, ptr) do {                  \
        __asm__ __volatile__ (                              \
-               "\n1:\tstw %2,0(%%sr2,%1)"                  \
-               "\n2:\tstw %R2,4(%%sr2,%1)\n\t"             \
-               ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\
-               ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\
+               "1: stw %2,0(%%sr2,%1)\n"                   \
+               "2: stw %R2,4(%%sr2,%1)\n"                  \
+               "9:\n"                                      \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)     \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)     \
                : "=r"(__pu_err)                            \
-               : "r"(ptr), "r"(__val), "0"(__pu_err) \
-               : "r1");                                    \
+               : "r"(ptr), "r"(__val), "0"(__pu_err));     \
 } while (0)
 
 #endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/kernel/parisc_ksyms.c 
b/arch/parisc/kernel/parisc_ksyms.c
index 3cad8aadc69e..4e6f0d93154f 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64);
 EXPORT_SYMBOL(lclear_user);
 EXPORT_SYMBOL(lstrnlen_user);
 
-/* Global fixups - defined as int to avoid creation of function pointers */
-extern int fixup_get_user_skip_1;
-extern int fixup_get_user_skip_2;
-extern int fixup_put_user_skip_1;
-extern int fixup_put_user_skip_2;
-EXPORT_SYMBOL(fixup_get_user_skip_1);
-EXPORT_SYMBOL(fixup_get_user_skip_2);
-EXPORT_SYMBOL(fixup_put_user_skip_1);
-EXPORT_SYMBOL(fixup_put_user_skip_2);
-
 #ifndef CONFIG_64BIT
 /* Needed so insmod can set dp value */
 extern int $global$;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index e81afc378850..e7ffde2758fc 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -140,6 +140,8 @@ void machine_power_off(void)
        printk(KERN_EMERG "System shut down completed.\n"
               "Please power this system off now.");
 
+       /* prevent soft lockup/stalled CPU messages for endless loop. */
+       rcu_sysrq_start();
        for (;;);
 }
 
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index 8fa92b8d839a..f2dac4d73b1b 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -2,7 +2,7 @@
 # Makefile for parisc-specific library files
 #
 
-lib-y  := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \
+lib-y  := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
           ucmpdi2.o delay.o
 
 obj-y  := iomap.o
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
deleted file mode 100644
index a5b72f22c7a6..000000000000
--- a/arch/parisc/lib/fixup.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *
- *  Copyright (C) 2004  Randolph Chung <[email protected]>
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2, or (at your option)
- *    any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- * 
- * Fixup routines for kernel exception handling.
- */
-#include <asm/asm-offsets.h>
-#include <asm/assembly.h>
-#include <asm/errno.h>
-#include <linux/linkage.h>
-
-#ifdef CONFIG_SMP
-       .macro  get_fault_ip t1 t2
-       loadgp
-       addil LT%__per_cpu_offset,%r27
-       LDREG RT%__per_cpu_offset(%r1),\t1
-       /* t2 = smp_processor_id() */
-       mfctl 30,\t2
-       ldw TI_CPU(\t2),\t2
-#ifdef CONFIG_64BIT
-       extrd,u \t2,63,32,\t2
-#endif
-       /* t2 = &__per_cpu_offset[smp_processor_id()]; */
-       LDREGX \t2(\t1),\t2 
-       addil LT%exception_data,%r27
-       LDREG RT%exception_data(%r1),\t1
-       /* t1 = this_cpu_ptr(&exception_data) */
-       add,l \t1,\t2,\t1
-       /* %r27 = t1->fault_gp - restore gp */
-       LDREG EXCDATA_GP(\t1), %r27
-       /* t1 = t1->fault_ip */
-       LDREG EXCDATA_IP(\t1), \t1
-       .endm
-#else
-       .macro  get_fault_ip t1 t2
-       loadgp
-       /* t1 = this_cpu_ptr(&exception_data) */
-       addil LT%exception_data,%r27
-       LDREG RT%exception_data(%r1),\t2
-       /* %r27 = t2->fault_gp - restore gp */
-       LDREG EXCDATA_GP(\t2), %r27
-       /* t1 = t2->fault_ip */
-       LDREG EXCDATA_IP(\t2), \t1
-       .endm
-#endif
-
-       .level LEVEL
-
-       .text
-       .section .fixup, "ax"
-
-       /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
-ENTRY_CFI(fixup_get_user_skip_1)
-       get_fault_ip %r1,%r8
-       ldo 4(%r1), %r1
-       ldi -EFAULT, %r8
-       bv %r0(%r1)
-       copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_1)
-
-ENTRY_CFI(fixup_get_user_skip_2)
-       get_fault_ip %r1,%r8
-       ldo 8(%r1), %r1
-       ldi -EFAULT, %r8
-       bv %r0(%r1)
-       copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_2)
-
-       /* put_user() fixups, store -EFAULT in r8 */
-ENTRY_CFI(fixup_put_user_skip_1)
-       get_fault_ip %r1,%r8
-       ldo 4(%r1), %r1
-       bv %r0(%r1)
-       ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_1)
-
-ENTRY_CFI(fixup_put_user_skip_2)
-       get_fault_ip %r1,%r8
-       ldo 8(%r1), %r1
-       bv %r0(%r1)
-       ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_2)
-
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 56845de6b5df..f01188c044ee 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -5,6 +5,8 @@
  *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
  *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
  *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
+ *    Copyright (C) 2017 Helge Deller <[email protected]>
+ *    Copyright (C) 2017 John David Anglin <[email protected]>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)
 
        .procend
 
+
+
+/*
+ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
+ *
+ * Inputs:
+ * - sr1 already contains space of source region
+ * - sr2 already contains space of destination region
+ *
+ * Returns:
+ * - number of bytes that could not be copied.
+ *   On success, this will be zero.
+ *
+ * This code is based on a C-implementation of a copy routine written by
+ * Randolph Chung, which in turn was derived from the glibc.
+ *
+ * Several strategies are tried to try to get the best performance for various
+ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
+ * at a time using general registers.  Unaligned copies are handled either by
+ * aligning the destination and then using shift-and-write method, or in a few
+ * cases by falling back to a byte-at-a-time copy.
+ *
+ * Testing with various alignments and buffer sizes shows that this code is
+ * often >10x faster than a simple byte-at-a-time copy, even for strangely
+ * aligned operands. It is interesting to note that the glibc version of memcpy
+ * (written in C) is actually quite fast already. This routine is able to beat
+ * it by 30-40% for aligned copies because of the loop unrolling, but in some
+ * cases the glibc version is still slightly faster. This lends more
+ * credibility that gcc can generate very good code as long as we are careful.
+ *
+ * Possible optimizations:
+ * - add cache prefetching
+ * - try not to use the post-increment address modifiers; they may create
+ *   additional interlocks. Assumption is that those were only efficient on old
+ *   machines (pre PA8000 processors)
+ */
+
+       dst = arg0
+       src = arg1
+       len = arg2
+       end = arg3
+       t1  = r19
+       t2  = r20
+       t3  = r21
+       t4  = r22
+       srcspc = sr1
+       dstspc = sr2
+
+       t0 = r1
+       a1 = t1
+       a2 = t2
+       a3 = t3
+       a0 = t4
+
+       save_src = ret0
+       save_dst = ret1
+       save_len = r31
+
+ENTRY_CFI(pa_memcpy)
+       .proc
+       .callinfo NO_CALLS
+       .entry
+
+       /* Last destination address */
+       add     dst,len,end
+
+       /* short copy with less than 16 bytes? */
+       cmpib,>>=,n 15,len,.Lbyte_loop
+
+       /* same alignment? */
+       xor     src,dst,t0
+       extru   t0,31,2,t1
+       cmpib,<>,n  0,t1,.Lunaligned_copy
+
+#ifdef CONFIG_64BIT
+       /* only do 64-bit copies if we can get aligned. */
+       extru   t0,31,3,t1
+       cmpib,<>,n  0,t1,.Lalign_loop32
+
+       /* loop until we are 64-bit aligned */
+.Lalign_loop64:
+       extru   dst,31,3,t1
+       cmpib,=,n       0,t1,.Lcopy_loop_16
+20:    ldb,ma  1(srcspc,src),t1
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lalign_loop64
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+       ldi     31,t0
+.Lcopy_loop_16:
+       cmpb,COND(>>=),n t0,len,.Lword_loop
+
+10:    ldd     0(srcspc,src),t1
+11:    ldd     8(srcspc,src),t2
+       ldo     16(src),src
+12:    std,ma  t1,8(dstspc,dst)
+13:    std,ma  t2,8(dstspc,dst)
+14:    ldd     0(srcspc,src),t1
+15:    ldd     8(srcspc,src),t2
+       ldo     16(src),src
+16:    std,ma  t1,8(dstspc,dst)
+17:    std,ma  t2,8(dstspc,dst)
+
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+       b       .Lcopy_loop_16
+       ldo     -32(len),len
+
+.Lword_loop:
+       cmpib,COND(>>=),n 3,len,.Lbyte_loop
+20:    ldw,ma  4(srcspc,src),t1
+21:    stw,ma  t1,4(dstspc,dst)
+       b       .Lword_loop
+       ldo     -4(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+#endif /* CONFIG_64BIT */
+
+       /* loop until we are 32-bit aligned */
+.Lalign_loop32:
+       extru   dst,31,2,t1
+       cmpib,=,n       0,t1,.Lcopy_loop_4
+20:    ldb,ma  1(srcspc,src),t1
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lalign_loop32
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+
+.Lcopy_loop_4:
+       cmpib,COND(>>=),n 15,len,.Lbyte_loop
+
+10:    ldw     0(srcspc,src),t1
+11:    ldw     4(srcspc,src),t2
+12:    stw,ma  t1,4(dstspc,dst)
+13:    stw,ma  t2,4(dstspc,dst)
+14:    ldw     8(srcspc,src),t1
+15:    ldw     12(srcspc,src),t2
+       ldo     16(src),src
+16:    stw,ma  t1,4(dstspc,dst)
+17:    stw,ma  t2,4(dstspc,dst)
+
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
+       ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+       b       .Lcopy_loop_4
+       ldo     -16(len),len
+
+.Lbyte_loop:
+       cmpclr,COND(<>) len,%r0,%r0
+       b,n     .Lcopy_done
+20:    ldb     0(srcspc,src),t1
+       ldo     1(src),src
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lbyte_loop
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_done:
+       bv      %r0(%r2)
+       sub     end,dst,ret0
+
+
+       /* src and dst are not aligned the same way. */
+       /* need to go the hard way */
+.Lunaligned_copy:
+       /* align until dst is 32bit-word-aligned */
+       extru   dst,31,2,t1
+       cmpib,COND(=),n 0,t1,.Lcopy_dstaligned
+20:    ldb     0(srcspc,src),t1
+       ldo     1(src),src
+21:    stb,ma  t1,1(dstspc,dst)
+       b       .Lunaligned_copy
+       ldo     -1(len),len
+
+       ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+       ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_dstaligned:
+
+       /* store src, dst and len in safe place */
+       copy    src,save_src
+       copy    dst,save_dst
+       copy    len,save_len
+
+       /* len now needs give number of words to copy */
+       SHRREG  len,2,len
+
+       /*
+        * Copy from a not-aligned src to an aligned dst using shifts.
+        * Handles 4 words per loop.
+        */
+
+       depw,z src,28,2,t0
+       subi 32,t0,t0
+       mtsar t0
+       extru len,31,2,t0
+       cmpib,= 2,t0,.Lcase2
+       /* Make src aligned by rounding it down.  */
+       depi 0,31,2,src
+
+       cmpiclr,<> 3,t0,%r0
+       b,n .Lcase3
+       cmpiclr,<> 1,t0,%r0
+       b,n .Lcase1
+.Lcase0:
+       cmpb,= %r0,len,.Lcda_finish
+       nop
+
+1:     ldw,ma 4(srcspc,src), a3
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a0
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       b,n .Ldo3
+.Lcase1:
+1:     ldw,ma 4(srcspc,src), a2
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a3
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       ldo -1(len),len
+       cmpb,=,n %r0,len,.Ldo0
+.Ldo4:
+1:     ldw,ma 4(srcspc,src), a0
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a2, a3, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo3:
+1:     ldw,ma 4(srcspc,src), a1
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a3, a0, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo2:
+1:     ldw,ma 4(srcspc,src), a2
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a0, a1, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo1:
+1:     ldw,ma 4(srcspc,src), a3
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       shrpw a1, a2, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+       ldo -4(len),len
+       cmpb,<> %r0,len,.Ldo4
+       nop
+.Ldo0:
+       shrpw a2, a3, %sar, t0
+1:     stw,ma t0, 4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+
+.Lcda_rdfault:
+.Lcda_finish:
+       /* calculate new src, dst and len and jump to byte-copy loop */
+       sub     dst,save_dst,t0
+       add     save_src,t0,src
+       b       .Lbyte_loop
+       sub     save_len,t0,len
+
+.Lcase3:
+1:     ldw,ma 4(srcspc,src), a0
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a1
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       b .Ldo2
+       ldo 1(len),len
+.Lcase2:
+1:     ldw,ma 4(srcspc,src), a1
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:     ldw,ma 4(srcspc,src), a2
+       ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+       b .Ldo1
+       ldo 2(len),len
+
+
+       /* fault exception fixup handlers: */
+#ifdef CONFIG_64BIT
+.Lcopy16_fault:
+10:    b       .Lcopy_done
+       std,ma  t1,8(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+#endif
+
+.Lcopy8_fault:
+10:    b       .Lcopy_done
+       stw,ma  t1,4(dstspc,dst)
+       ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+
+       .exit
+ENDPROC_CFI(pa_memcpy)
+       .procend
+
        .end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index f82ff10ed974..b3d47ec1d80a 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -2,7 +2,7 @@
  *    Optimized memory copy routines.
  *
  *    Copyright (C) 2004 Randolph Chung <[email protected]>
- *    Copyright (C) 2013 Helge Deller <[email protected]>
+ *    Copyright (C) 2013-2017 Helge Deller <[email protected]>
  *
  *    This program is free software; you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
@@ -21,474 +21,21 @@
  *    Portions derived from the GNU C Library
  *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
  *
- * Several strategies are tried to try to get the best performance for various
- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using 
- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
- * general registers.  Unaligned copies are handled either by aligning the 
- * destination and then using shift-and-write method, or in a few cases by 
- * falling back to a byte-at-a-time copy.
- *
- * I chose to implement this in C because it is easier to maintain and debug,
- * and in my experiments it appears that the C code generated by gcc (3.3/3.4
- * at the time of writing) is fairly optimal. Unfortunately some of the 
- * semantics of the copy routine (exception handling) is difficult to express
- * in C, so we have to play some tricks to get it to work.
- *
- * All the loads and stores are done via explicit asm() code in order to use
- * the right space registers. 
- * 
- * Testing with various alignments and buffer sizes shows that this code is 
- * often >10x faster than a simple byte-at-a-time copy, even for strangely
- * aligned operands. It is interesting to note that the glibc version
- * of memcpy (written in C) is actually quite fast already. This routine is 
- * able to beat it by 30-40% for aligned copies because of the loop unrolling, 
- * but in some cases the glibc version is still slightly faster. This lends 
- * more credibility that gcc can generate very good code as long as we are 
- * careful.
- *
- * TODO:
- * - cache prefetching needs more experimentation to get optimal settings
- * - try not to use the post-increment address modifiers; they create 
additional
- *   interlocks
- * - replace byte-copy loops with stybs sequences
  */
 
-#ifdef __KERNEL__
 #include <linux/module.h>
 #include <linux/compiler.h>
 #include <linux/uaccess.h>
-#define s_space "%%sr1"
-#define d_space "%%sr2"
-#else
-#include "memcpy.h"
-#define s_space "%%sr0"
-#define d_space "%%sr0"
-#define pa_memcpy new2_copy
-#endif
 
 DECLARE_PER_CPU(struct exception_data, exception_data);
 
-#define preserve_branch(label) do {                                    \
-       volatile int dummy = 0;                                         \
-       /* The following branch is never taken, it's just here to  */   \
-       /* prevent gcc from optimizing away our exception code. */      \
-       if (unlikely(dummy != dummy))                                   \
-               goto label;                                             \
-} while (0)
-
 #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
 #define get_kernel_space() (0)
 
-#define MERGE(w0, sh_1, w1, sh_2)  ({                                  \
-       unsigned int _r;                                                \
-       asm volatile (                                                  \
-       "mtsar %3\n"                                                    \
-       "shrpw %1, %2, %%sar, %0\n"                                     \
-       : "=r"(_r)                                                      \
-       : "r"(w0), "r"(w1), "r"(sh_2)                                   \
-       );                                                              \
-       _r;                                                             \
-})
-#define THRESHOLD      16
-
-#ifdef DEBUG_MEMCPY
-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, 
__LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
-#else
-#define DPRINTF(fmt, args...)
-#endif
-
-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)    \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       : _tt(_t), "+r"(_a)                             \
-       :                                               \
-       : "r8")
-
-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)   \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       : "+r"(_a)                                      \
-       : _tt(_t)                                       \
-       : "r8")
-
-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
-
-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e)        \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t"     \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       : _tt(_t)                                       \
-       : "r"(_a)                                       \
-       : "r8")
-
-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e)       \
-       __asm__ __volatile__ (                          \
-       "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t"     \
-       ASM_EXCEPTIONTABLE_ENTRY(1b,_e)                 \
-       :                                               \
-       : _tt(_t), "r"(_a)                              \
-       : "r8")
-
-#define ldw(_s,_o,_a,_t,_e)    def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
-#define stw(_s,_t,_o,_a,_e)    def_store_insn(stw,"r",_s,_t,_o,_a,_e)
-
-#ifdef  CONFIG_PREFETCH
-static inline void prefetch_src(const void *addr)
-{
-       __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
-}
-
-static inline void prefetch_dst(const void *addr)
-{
-       __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
-}
-#else
-#define prefetch_src(addr) do { } while(0)
-#define prefetch_dst(addr) do { } while(0)
-#endif
-
-#define PA_MEMCPY_OK           0
-#define PA_MEMCPY_LOAD_ERROR   1
-#define PA_MEMCPY_STORE_ERROR  2
-
-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
- * per loop.  This code is derived from glibc. 
- */
-static noinline unsigned long copy_dstaligned(unsigned long dst,
-                                       unsigned long src, unsigned long len)
-{
-       /* gcc complains that a2 and a3 may be uninitialized, but actually
-        * they cannot be.  Initialize a2/a3 to shut gcc up.
-        */
-       register unsigned int a0, a1, a2 = 0, a3 = 0;
-       int sh_1, sh_2;
-
-       /* prefetch_src((const void *)src); */
-
-       /* Calculate how to shift a word read at the memory operation
-          aligned srcp to make it aligned for copy.  */
-       sh_1 = 8 * (src % sizeof(unsigned int));
-       sh_2 = 8 * sizeof(unsigned int) - sh_1;
-
-       /* Make src aligned by rounding it down.  */
-       src &= -sizeof(unsigned int);
-
-       switch (len % 4)
-       {
-               case 2:
-                       /* a1 = ((unsigned int *) src)[0];
-                          a2 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a1, cda_ldw_exc);
-                       ldw(s_space, 4, src, a2, cda_ldw_exc);
-                       src -= 1 * sizeof(unsigned int);
-                       dst -= 3 * sizeof(unsigned int);
-                       len += 2;
-                       goto do1;
-               case 3:
-                       /* a0 = ((unsigned int *) src)[0];
-                          a1 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a0, cda_ldw_exc);
-                       ldw(s_space, 4, src, a1, cda_ldw_exc);
-                       src -= 0 * sizeof(unsigned int);
-                       dst -= 2 * sizeof(unsigned int);
-                       len += 1;
-                       goto do2;
-               case 0:
-                       if (len == 0)
-                               return PA_MEMCPY_OK;
-                       /* a3 = ((unsigned int *) src)[0];
-                          a0 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a3, cda_ldw_exc);
-                       ldw(s_space, 4, src, a0, cda_ldw_exc);
-                       src -=-1 * sizeof(unsigned int);
-                       dst -= 1 * sizeof(unsigned int);
-                       len += 0;
-                       goto do3;
-               case 1:
-                       /* a2 = ((unsigned int *) src)[0];
-                          a3 = ((unsigned int *) src)[1]; */
-                       ldw(s_space, 0, src, a2, cda_ldw_exc);
-                       ldw(s_space, 4, src, a3, cda_ldw_exc);
-                       src -=-2 * sizeof(unsigned int);
-                       dst -= 0 * sizeof(unsigned int);
-                       len -= 1;
-                       if (len == 0)
-                               goto do0;
-                       goto do4;                       /* No-op.  */
-       }
-
-       do
-       {
-               /* prefetch_src((const void *)(src + 4 * sizeof(unsigned 
int))); */
-do4:
-               /* a0 = ((unsigned int *) src)[0]; */
-               ldw(s_space, 0, src, a0, cda_ldw_exc);
-               /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
-               stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-do3:
-               /* a1 = ((unsigned int *) src)[1]; */
-               ldw(s_space, 4, src, a1, cda_ldw_exc);
-               /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
-               stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
-do2:
-               /* a2 = ((unsigned int *) src)[2]; */
-               ldw(s_space, 8, src, a2, cda_ldw_exc);
-               /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
-               stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
-do1:
-               /* a3 = ((unsigned int *) src)[3]; */
-               ldw(s_space, 12, src, a3, cda_ldw_exc);
-               /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
-               stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
-
-               src += 4 * sizeof(unsigned int);
-               dst += 4 * sizeof(unsigned int);
-               len -= 4;
-       }
-       while (len != 0);
-
-do0:
-       /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
-       stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-
-       preserve_branch(handle_load_error);
-       preserve_branch(handle_store_error);
-
-       return PA_MEMCPY_OK;
-
-handle_load_error:
-       __asm__ __volatile__ ("cda_ldw_exc:\n");
-       return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
-       __asm__ __volatile__ ("cda_stw_exc:\n");
-       return PA_MEMCPY_STORE_ERROR;
-}
-
-
-/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
- * In case of an access fault the faulty address can be read from the per_cpu
- * exception data struct. */
-static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
-                                       unsigned long len)
-{
-       register unsigned long src, dst, t1, t2, t3;
-       register unsigned char *pcs, *pcd;
-       register unsigned int *pws, *pwd;
-       register double *pds, *pdd;
-       unsigned long ret;
-
-       src = (unsigned long)srcp;
-       dst = (unsigned long)dstp;
-       pcs = (unsigned char *)srcp;
-       pcd = (unsigned char *)dstp;
-
-       /* prefetch_src((const void *)srcp); */
-
-       if (len < THRESHOLD)
-               goto byte_copy;
-
-       /* Check alignment */
-       t1 = (src ^ dst);
-       if (unlikely(t1 & (sizeof(double)-1)))
-               goto unaligned_copy;
-
-       /* src and dst have same alignment. */
-
-       /* Copy bytes till we are double-aligned. */
-       t2 = src & (sizeof(double) - 1);
-       if (unlikely(t2 != 0)) {
-               t2 = sizeof(double) - t2;
-               while (t2 && len) {
-                       /* *pcd++ = *pcs++; */
-                       ldbma(s_space, pcs, t3, pmc_load_exc);
-                       len--;
-                       stbma(d_space, t3, pcd, pmc_store_exc);
-                       t2--;
-               }
-       }
-
-       pds = (double *)pcs;
-       pdd = (double *)pcd;
-
-#if 0
-       /* Copy 8 doubles at a time */
-       while (len >= 8*sizeof(double)) {
-               register double r1, r2, r3, r4, r5, r6, r7, r8;
-               /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
-               flddma(s_space, pds, r1, pmc_load_exc);
-               flddma(s_space, pds, r2, pmc_load_exc);
-               flddma(s_space, pds, r3, pmc_load_exc);
-               flddma(s_space, pds, r4, pmc_load_exc);
-               fstdma(d_space, r1, pdd, pmc_store_exc);
-               fstdma(d_space, r2, pdd, pmc_store_exc);
-               fstdma(d_space, r3, pdd, pmc_store_exc);
-               fstdma(d_space, r4, pdd, pmc_store_exc);
-
-#if 0
-               if (L1_CACHE_BYTES <= 32)
-                       prefetch_src((char *)pds + L1_CACHE_BYTES);
-#endif
-               flddma(s_space, pds, r5, pmc_load_exc);
-               flddma(s_space, pds, r6, pmc_load_exc);
-               flddma(s_space, pds, r7, pmc_load_exc);
-               flddma(s_space, pds, r8, pmc_load_exc);
-               fstdma(d_space, r5, pdd, pmc_store_exc);
-               fstdma(d_space, r6, pdd, pmc_store_exc);
-               fstdma(d_space, r7, pdd, pmc_store_exc);
-               fstdma(d_space, r8, pdd, pmc_store_exc);
-               len -= 8*sizeof(double);
-       }
-#endif
-
-       pws = (unsigned int *)pds;
-       pwd = (unsigned int *)pdd;
-
-word_copy:
-       while (len >= 8*sizeof(unsigned int)) {
-               register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
-               /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
-               ldwma(s_space, pws, r1, pmc_load_exc);
-               ldwma(s_space, pws, r2, pmc_load_exc);
-               ldwma(s_space, pws, r3, pmc_load_exc);
-               ldwma(s_space, pws, r4, pmc_load_exc);
-               stwma(d_space, r1, pwd, pmc_store_exc);
-               stwma(d_space, r2, pwd, pmc_store_exc);
-               stwma(d_space, r3, pwd, pmc_store_exc);
-               stwma(d_space, r4, pwd, pmc_store_exc);
-
-               ldwma(s_space, pws, r5, pmc_load_exc);
-               ldwma(s_space, pws, r6, pmc_load_exc);
-               ldwma(s_space, pws, r7, pmc_load_exc);
-               ldwma(s_space, pws, r8, pmc_load_exc);
-               stwma(d_space, r5, pwd, pmc_store_exc);
-               stwma(d_space, r6, pwd, pmc_store_exc);
-               stwma(d_space, r7, pwd, pmc_store_exc);
-               stwma(d_space, r8, pwd, pmc_store_exc);
-               len -= 8*sizeof(unsigned int);
-       }
-
-       while (len >= 4*sizeof(unsigned int)) {
-               register unsigned int r1,r2,r3,r4;
-               ldwma(s_space, pws, r1, pmc_load_exc);
-               ldwma(s_space, pws, r2, pmc_load_exc);
-               ldwma(s_space, pws, r3, pmc_load_exc);
-               ldwma(s_space, pws, r4, pmc_load_exc);
-               stwma(d_space, r1, pwd, pmc_store_exc);
-               stwma(d_space, r2, pwd, pmc_store_exc);
-               stwma(d_space, r3, pwd, pmc_store_exc);
-               stwma(d_space, r4, pwd, pmc_store_exc);
-               len -= 4*sizeof(unsigned int);
-       }
-
-       pcs = (unsigned char *)pws;
-       pcd = (unsigned char *)pwd;
-
-byte_copy:
-       while (len) {
-               /* *pcd++ = *pcs++; */
-               ldbma(s_space, pcs, t3, pmc_load_exc);
-               stbma(d_space, t3, pcd, pmc_store_exc);
-               len--;
-       }
-
-       return PA_MEMCPY_OK;
-
-unaligned_copy:
-       /* possibly we are aligned on a word, but not on a double... */
-       if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
-               t2 = src & (sizeof(unsigned int) - 1);
-
-               if (unlikely(t2 != 0)) {
-                       t2 = sizeof(unsigned int) - t2;
-                       while (t2) {
-                               /* *pcd++ = *pcs++; */
-                               ldbma(s_space, pcs, t3, pmc_load_exc);
-                               stbma(d_space, t3, pcd, pmc_store_exc);
-                               len--;
-                               t2--;
-                       }
-               }
-
-               pws = (unsigned int *)pcs;
-               pwd = (unsigned int *)pcd;
-               goto word_copy;
-       }
-
-       /* Align the destination.  */
-       if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
-               t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
-               while (t2) {
-                       /* *pcd++ = *pcs++; */
-                       ldbma(s_space, pcs, t3, pmc_load_exc);
-                       stbma(d_space, t3, pcd, pmc_store_exc);
-                       len--;
-                       t2--;
-               }
-               dst = (unsigned long)pcd;
-               src = (unsigned long)pcs;
-       }
-
-       ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
-       if (ret)
-               return ret;
-
-       pcs += (len & -sizeof(unsigned int));
-       pcd += (len & -sizeof(unsigned int));
-       len %= sizeof(unsigned int);
-
-       preserve_branch(handle_load_error);
-       preserve_branch(handle_store_error);
-
-       goto byte_copy;
-
-handle_load_error:
-       __asm__ __volatile__ ("pmc_load_exc:\n");
-       return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
-       __asm__ __volatile__ ("pmc_store_exc:\n");
-       return PA_MEMCPY_STORE_ERROR;
-}
-
-
 /* Returns 0 for success, otherwise, returns number of bytes not transferred. 
*/
-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
-{
-       unsigned long ret, fault_addr, reference;
-       struct exception_data *d;
-
-       ret = pa_memcpy_internal(dstp, srcp, len);
-       if (likely(ret == PA_MEMCPY_OK))
-               return 0;
-
-       /* if a load or store fault occured we can get the faulty addr */
-       d = this_cpu_ptr(&exception_data);
-       fault_addr = d->fault_addr;
-
-       /* error in load or store? */
-       if (ret == PA_MEMCPY_LOAD_ERROR)
-               reference = (unsigned long) srcp;
-       else
-               reference = (unsigned long) dstp;
+extern unsigned long pa_memcpy(void *dst, const void *src,
+                               unsigned long len);
 
-       DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
-               ret, len, fault_addr, reference);
-
-       if (fault_addr >= reference)
-               return len - (fault_addr - reference);
-       else
-               return len;
-}
-
-#ifdef __KERNEL__
 unsigned long __copy_to_user(void __user *dst, const void *src,
                             unsigned long len)
 {
@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t 
size)
 
        return __probe_kernel_read(dst, src, size);
 }
-
-#endif
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1a0b4f63f0e9..040c48fc5391 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -149,6 +149,23 @@ int fixup_exception(struct pt_regs *regs)
                d->fault_space = regs->isr;
                d->fault_addr = regs->ior;
 
+               /*
+                * Fix up get_user() and put_user().
+                * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
+                * bit in the relative address of the fixup routine to indicate
+                * that %r8 should be loaded with -EFAULT to report a userspace
+                * access error.
+                */
+               if (fix->fixup & 1) {
+                       regs->gr[8] = -EFAULT;
+
+                       /* zero target register for get_user() */
+                       if (parisc_acctyp(0, regs->iir) == VM_READ) {
+                               int treg = regs->iir & 0x1f;
+                               regs->gr[treg] = 0;
+                       }
+               }
+
                regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
                regs->iaoq[0] &= ~3;
                /*
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 779782f58324..9a53a06e5a3e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
        _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
        _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
        _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
-       _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+       _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
        _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
        _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
        _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 887e57182716..aed206475aa7 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
 #if defined(CONFIG_X86_ESPFIX64)
 static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
 #elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_START;
+static const unsigned long vaddr_end = EFI_VA_END;
 #else
 static const unsigned long vaddr_end = __START_KERNEL_map;
 #endif
@@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void)
         */
        BUILD_BUG_ON(vaddr_start >= vaddr_end);
        BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
-                    vaddr_end >= EFI_VA_START);
+                    vaddr_end >= EFI_VA_END);
        BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
                      IS_ENABLED(CONFIG_EFI)) &&
                     vaddr_end >= __START_KERNEL_map);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f8960fca0827..9f21b0c5945d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -713,10 +713,9 @@ static void __init xen_reserve_xen_mfnlist(void)
                size = PFN_PHYS(xen_start_info->nr_p2m_frames);
        }
 
-       if (!xen_is_e820_reserved(start, size)) {
-               memblock_reserve(start, size);
+       memblock_reserve(start, size);
+       if (!xen_is_e820_reserved(start, size))
                return;
-       }
 
 #ifdef CONFIG_X86_32
        /*
@@ -727,6 +726,7 @@ static void __init xen_reserve_xen_mfnlist(void)
        BUG();
 #else
        xen_relocate_p2m();
+       memblock_free(start, size);
 #endif
 }
 
diff --git a/block/bio.c b/block/bio.c
index db85c5753a76..655c9016052a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -372,10 +372,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
        bio_list_init(&punt);
        bio_list_init(&nopunt);
 
-       while ((bio = bio_list_pop(current->bio_list)))
+       while ((bio = bio_list_pop(&current->bio_list[0])))
                bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[0] = nopunt;
 
-       *current->bio_list = nopunt;
+       bio_list_init(&nopunt);
+       while ((bio = bio_list_pop(&current->bio_list[1])))
+               bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[1] = nopunt;
 
        spin_lock(&bs->rescue_lock);
        bio_list_merge(&bs->rescue_list, &punt);
@@ -462,7 +466,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, 
struct bio_set *bs)
                 * we retry with the original gfp_flags.
                 */
 
-               if (current->bio_list && !bio_list_empty(current->bio_list))
+               if (current->bio_list &&
+                   (!bio_list_empty(&current->bio_list[0]) ||
+                    !bio_list_empty(&current->bio_list[1])))
                        gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
                p = mempool_alloc(bs->bio_pool, gfp_mask);
diff --git a/block/blk-core.c b/block/blk-core.c
index 14d7c0740dc0..d1f2801ce836 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1994,7 +1994,14 @@ generic_make_request_checks(struct bio *bio)
  */
 blk_qc_t generic_make_request(struct bio *bio)
 {
-       struct bio_list bio_list_on_stack;
+       /*
+        * bio_list_on_stack[0] contains bios submitted by the current
+        * make_request_fn.
+        * bio_list_on_stack[1] contains bios that were submitted before
+        * the current make_request_fn, but that haven't been processed
+        * yet.
+        */
+       struct bio_list bio_list_on_stack[2];
        blk_qc_t ret = BLK_QC_T_NONE;
 
        if (!generic_make_request_checks(bio))
@@ -2011,7 +2018,7 @@ blk_qc_t generic_make_request(struct bio *bio)
         * should be added at the tail
         */
        if (current->bio_list) {
-               bio_list_add(current->bio_list, bio);
+               bio_list_add(&current->bio_list[0], bio);
                goto out;
        }
 
@@ -2030,23 +2037,39 @@ blk_qc_t generic_make_request(struct bio *bio)
         * bio_list, and call into ->make_request() again.
         */
        BUG_ON(bio->bi_next);
-       bio_list_init(&bio_list_on_stack);
-       current->bio_list = &bio_list_on_stack;
+       bio_list_init(&bio_list_on_stack[0]);
+       current->bio_list = bio_list_on_stack;
        do {
                struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
                if (likely(blk_queue_enter(q, false) == 0)) {
+                       struct bio_list lower, same;
+
+                       /* Create a fresh bio_list for all subordinate requests 
*/
+                       bio_list_on_stack[1] = bio_list_on_stack[0];
+                       bio_list_init(&bio_list_on_stack[0]);
                        ret = q->make_request_fn(q, bio);
 
                        blk_queue_exit(q);
 
-                       bio = bio_list_pop(current->bio_list);
+                       /* sort new bios into those for a lower level
+                        * and those for the same level
+                        */
+                       bio_list_init(&lower);
+                       bio_list_init(&same);
+                       while ((bio = bio_list_pop(&bio_list_on_stack[0])) != 
NULL)
+                               if (q == bdev_get_queue(bio->bi_bdev))
+                                       bio_list_add(&same, bio);
+                               else
+                                       bio_list_add(&lower, bio);
+                       /* now assemble so we handle the lowest level first */
+                       bio_list_merge(&bio_list_on_stack[0], &lower);
+                       bio_list_merge(&bio_list_on_stack[0], &same);
+                       bio_list_merge(&bio_list_on_stack[0], 
&bio_list_on_stack[1]);
                } else {
-                       struct bio *bio_next = bio_list_pop(current->bio_list);
-
                        bio_io_error(bio);
-                       bio = bio_next;
                }
+               bio = bio_list_pop(&bio_list_on_stack[0]);
        } while (bio);
        current->bio_list = NULL; /* deactivate */
 
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 9ed087853dee..4c5678cfa9c4 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the Linux ACPI interpreter
 #
 
-ccflags-y                      := -Os
 ccflags-$(CONFIG_ACPI_DEBUG)   += -DACPI_DEBUG_OUTPUT
 
 #
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index b4c1a6a51da4..03250e1f1103 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -25,9 +25,11 @@
 ACPI_MODULE_NAME("platform");
 
 static const struct acpi_device_id forbidden_id_list[] = {
-       {"PNP0000", 0}, /* PIC */
-       {"PNP0100", 0}, /* Timer */
-       {"PNP0200", 0}, /* AT DMA Controller */
+       {"PNP0000",  0},        /* PIC */
+       {"PNP0100",  0},        /* Timer */
+       {"PNP0200",  0},        /* AT DMA Controller */
+       {"ACPI0009", 0},        /* IOxAPIC */
+       {"ACPI000A", 0},        /* IOAPIC */
        {"", 0},
 };
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c 
b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index b1254f885fed..b87d27859141 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1299,6 +1299,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
                goto out_pm_put;
        }
 
+       mutex_lock(&gpu->lock);
+
        fence = etnaviv_gpu_fence_alloc(gpu);
        if (!fence) {
                event_free(gpu, event);
@@ -1306,8 +1308,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
                goto out_pm_put;
        }
 
-       mutex_lock(&gpu->lock);
-
        gpu->event[event].fence = fence;
        submit->fence = fence->seqno;
        gpu->active_fence = submit->fence;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index 3de5e6e21662..4ce04e06d9ac 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
                        rbo->placement.num_busy_placement = 0;
                        for (i = 0; i < rbo->placement.num_placement; i++) {
                                if (rbo->placements[i].flags & 
TTM_PL_FLAG_VRAM) {
-                                       if (rbo->placements[0].fpfn < fpfn)
-                                               rbo->placements[0].fpfn = fpfn;
+                                       if (rbo->placements[i].fpfn < fpfn)
+                                               rbo->placements[i].fpfn = fpfn;
                                } else {
                                        rbo->placement.busy_placement =
                                                &rbo->placements[i];
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 7aadce1f7e7a..c7e6c9839c9a 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -842,6 +842,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
        drm_atomic_helper_crtc_destroy_state(crtc, state);
 }
 
+static void
+vc4_crtc_reset(struct drm_crtc *crtc)
+{
+       if (crtc->state)
+               __drm_atomic_helper_crtc_destroy_state(crtc->state);
+
+       crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
+       if (crtc->state)
+               crtc->state->crtc = crtc;
+}
+
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
        .set_config = drm_atomic_helper_set_config,
        .destroy = vc4_crtc_destroy,
@@ -849,7 +860,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
        .set_property = NULL,
        .cursor_set = NULL, /* handled by drm_mode_cursor_universal */
        .cursor_move = NULL, /* handled by drm_mode_cursor_universal */
-       .reset = drm_atomic_helper_crtc_reset,
+       .reset = vc4_crtc_reset,
        .atomic_duplicate_state = vc4_crtc_duplicate_state,
        .atomic_destroy_state = vc4_crtc_destroy_state,
        .gamma_set = vc4_crtc_gamma_set,
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 5e7a5648e708..0c535d0f3b95 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2017,6 +2017,14 @@ static int wacom_parse_and_register(struct wacom *wacom, 
bool wireless)
 
        wacom_update_name(wacom, wireless ? " (WL)" : "");
 
+       /* pen only Bamboo neither support touch nor pad */
+       if ((features->type == BAMBOO_PEN) &&
+           ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
+           (features->device_type & WACOM_DEVICETYPE_PAD))) {
+               error = -ENODEV;
+               goto fail;
+       }
+
        error = wacom_add_shared_data(hdev);
        if (error)
                goto fail;
@@ -2064,14 +2072,6 @@ static int wacom_parse_and_register(struct wacom *wacom, 
bool wireless)
                goto fail_quirks;
        }
 
-       /* pen only Bamboo neither support touch nor pad */
-       if ((features->type == BAMBOO_PEN) &&
-           ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
-           (features->device_type & WACOM_DEVICETYPE_PAD))) {
-               error = -ENODEV;
-               goto fail_quirks;
-       }
-
        if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
                error = hid_hw_open(hdev);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 628ba001bb3c..e66f4040d84b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -986,26 +986,29 @@ static void flush_current_bio_list(struct blk_plug_cb 
*cb, bool from_schedule)
        struct dm_offload *o = container_of(cb, struct dm_offload, cb);
        struct bio_list list;
        struct bio *bio;
+       int i;
 
        INIT_LIST_HEAD(&o->cb.list);
 
        if (unlikely(!current->bio_list))
                return;
 
-       list = *current->bio_list;
-       bio_list_init(current->bio_list);
-
-       while ((bio = bio_list_pop(&list))) {
-               struct bio_set *bs = bio->bi_pool;
-               if (unlikely(!bs) || bs == fs_bio_set) {
-                       bio_list_add(current->bio_list, bio);
-                       continue;
+       for (i = 0; i < 2; i++) {
+               list = current->bio_list[i];
+               bio_list_init(&current->bio_list[i]);
+
+               while ((bio = bio_list_pop(&list))) {
+                       struct bio_set *bs = bio->bi_pool;
+                       if (unlikely(!bs) || bs == fs_bio_set) {
+                               bio_list_add(&current->bio_list[i], bio);
+                               continue;
+                       }
+
+                       spin_lock(&bs->rescue_lock);
+                       bio_list_add(&bs->rescue_list, bio);
+                       queue_work(bs->rescue_workqueue, &bs->rescue_work);
+                       spin_unlock(&bs->rescue_lock);
                }
-
-               spin_lock(&bs->rescue_lock);
-               bio_list_add(&bs->rescue_list, bio);
-               queue_work(bs->rescue_workqueue, &bs->rescue_work);
-               spin_unlock(&bs->rescue_lock);
        }
 }
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 55b5e0e77b17..4c4aab02e311 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -941,7 +941,8 @@ static void wait_barrier(struct r10conf *conf)
                                    !conf->barrier ||
                                    (atomic_read(&conf->nr_pending) &&
                                     current->bio_list &&
-                                    !bio_list_empty(current->bio_list)),
+                                    (!bio_list_empty(&current->bio_list[0]) ||
+                                     !bio_list_empty(&current->bio_list[1]))),
                                    conf->resync_lock);
                conf->nr_waiting--;
                if (!conf->nr_waiting)
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 387ae1cbf698..a8b430ff117b 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -29,6 +29,8 @@
 
 #include "sdhci-pltfm.h"
 
+#define SDMMC_MC1R     0x204
+#define                SDMMC_MC1R_DDR          BIT(3)
 #define SDMMC_CACR     0x230
 #define                SDMMC_CACR_CAPWREN      BIT(0)
 #define                SDMMC_CACR_KEY          (0x46 << 8)
@@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, 
unsigned char mode,
        sdhci_set_power_noreg(host, mode, vdd);
 }
 
+void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing)
+{
+       if (timing == MMC_TIMING_MMC_DDR52)
+               sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R);
+       sdhci_set_uhs_signaling(host, timing);
+}
+
 static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
        .set_clock              = sdhci_at91_set_clock,
        .set_bus_width          = sdhci_set_bus_width,
        .reset                  = sdhci_reset,
-       .set_uhs_signaling      = sdhci_set_uhs_signaling,
+       .set_uhs_signaling      = sdhci_at91_set_uhs_signaling,
        .set_power              = sdhci_at91_set_power,
 };
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index a983ba0349fb..7d275e72903a 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1823,6 +1823,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, 
int enable)
        struct sdhci_host *host = mmc_priv(mmc);
        unsigned long flags;
 
+       if (enable)
+               pm_runtime_get_noresume(host->mmc->parent);
+
        spin_lock_irqsave(&host->lock, flags);
        if (enable)
                host->flags |= SDHCI_SDIO_IRQ_ENABLED;
@@ -1831,6 +1834,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, 
int enable)
 
        sdhci_enable_sdio_irq_nolock(host, enable);
        spin_unlock_irqrestore(&host->lock, flags);
+
+       if (!enable)
+               pm_runtime_put_noidle(host->mmc->parent);
 }
 
 static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index da10b484bd25..bde769b11e3b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2057,9 +2057,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
                 * Revalidating a dead namespace sets capacity to 0. This will
                 * end buffered writers dirtying pages that can't be synced.
                 */
-               if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
-                       revalidate_disk(ns->disk);
-
+               if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+                       continue;
+               revalidate_disk(ns->disk);
                blk_set_queue_dying(ns->queue);
                blk_mq_abort_requeue_list(ns->queue);
                blk_mq_start_stopped_hw_queues(ns->queue, true);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5e52034ab010..8a9c186898c7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1983,8 +1983,10 @@ static void nvme_remove(struct pci_dev *pdev)
 
        pci_set_drvdata(pdev, NULL);
 
-       if (!pci_device_is_present(pdev))
+       if (!pci_device_is_present(pdev)) {
                nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+               nvme_dev_disable(dev, false);
+       }
 
        flush_work(&dev->reset_work);
        nvme_uninit_ctrl(&dev->ctrl);
diff --git a/drivers/pci/host/pcie-iproc-bcma.c 
b/drivers/pci/host/pcie-iproc-bcma.c
index 8ce089043a27..46ca8ed031fe 100644
--- a/drivers/pci/host/pcie-iproc-bcma.c
+++ b/drivers/pci/host/pcie-iproc-bcma.c
@@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
 {
        struct device *dev = &bdev->dev;
        struct iproc_pcie *pcie;
-       LIST_HEAD(res);
-       struct resource res_mem;
+       LIST_HEAD(resources);
        int ret;
 
        pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
@@ -62,22 +61,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
 
        pcie->base_addr = bdev->addr;
 
-       res_mem.start = bdev->addr_s[0];
-       res_mem.end = bdev->addr_s[0] + SZ_128M - 1;
-       res_mem.name = "PCIe MEM space";
-       res_mem.flags = IORESOURCE_MEM;
-       pci_add_resource(&res, &res_mem);
+       pcie->mem.start = bdev->addr_s[0];
+       pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
+       pcie->mem.name = "PCIe MEM space";
+       pcie->mem.flags = IORESOURCE_MEM;
+       pci_add_resource(&resources, &pcie->mem);
 
        pcie->map_irq = iproc_pcie_bcma_map_irq;
 
-       ret = iproc_pcie_setup(pcie, &res);
-       if (ret)
+       ret = iproc_pcie_setup(pcie, &resources);
+       if (ret) {
                dev_err(dev, "PCIe controller setup failed\n");
-
-       pci_free_resource_list(&res);
+               pci_free_resource_list(&resources);
+               return ret;
+       }
 
        bcma_set_drvdata(bdev, pcie);
-       return ret;
+       return 0;
 }
 
 static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
diff --git a/drivers/pci/host/pcie-iproc-platform.c 
b/drivers/pci/host/pcie-iproc-platform.c
index a3de087976b3..7dcaddcd2f16 100644
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -46,7 +46,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device 
*pdev)
        struct device_node *np = dev->of_node;
        struct resource reg;
        resource_size_t iobase = 0;
-       LIST_HEAD(res);
+       LIST_HEAD(resources);
        int ret;
 
        of_id = of_match_device(iproc_pcie_of_match_table, dev);
@@ -108,23 +108,24 @@ static int iproc_pcie_pltfm_probe(struct platform_device 
*pdev)
                pcie->phy = NULL;
        }
 
-       ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase);
+       ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources,
+                                              &iobase);
        if (ret) {
-               dev_err(dev,
-                       "unable to get PCI host bridge resources\n");
+               dev_err(dev, "unable to get PCI host bridge resources\n");
                return ret;
        }
 
        pcie->map_irq = of_irq_parse_and_map_pci;
 
-       ret = iproc_pcie_setup(pcie, &res);
-       if (ret)
+       ret = iproc_pcie_setup(pcie, &resources);
+       if (ret) {
                dev_err(dev, "PCIe controller setup failed\n");
-
-       pci_free_resource_list(&res);
+               pci_free_resource_list(&resources);
+               return ret;
+       }
 
        platform_set_drvdata(pdev, pcie);
-       return ret;
+       return 0;
 }
 
 static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
index e84d93c53c7b..fa4226742bcd 100644
--- a/drivers/pci/host/pcie-iproc.h
+++ b/drivers/pci/host/pcie-iproc.h
@@ -68,6 +68,7 @@ struct iproc_pcie {
 #ifdef CONFIG_ARM
        struct pci_sys_data sysdata;
 #endif
+       struct resource mem;
        struct pci_bus *root_bus;
        struct phy *phy;
        int (*map_irq)(const struct pci_dev *, u8, u8);
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c 
b/drivers/scsi/device_handler/scsi_dh_alua.c
index 7bb20684e9fa..d3145799b92f 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -113,7 +113,7 @@ struct alua_queue_data {
 #define ALUA_POLICY_SWITCH_ALL         1
 
 static void alua_rtpg_work(struct work_struct *work);
-static void alua_rtpg_queue(struct alua_port_group *pg,
+static bool alua_rtpg_queue(struct alua_port_group *pg,
                            struct scsi_device *sdev,
                            struct alua_queue_data *qdata, bool force);
 static void alua_check(struct scsi_device *sdev, bool force);
@@ -862,7 +862,13 @@ static void alua_rtpg_work(struct work_struct *work)
        kref_put(&pg->kref, release_port_group);
 }
 
-static void alua_rtpg_queue(struct alua_port_group *pg,
+/**
+ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ *
+ * Returns true if and only if alua_rtpg_work() will be called asynchronously.
+ * That function is responsible for calling @qdata->fn().
+ */
+static bool alua_rtpg_queue(struct alua_port_group *pg,
                            struct scsi_device *sdev,
                            struct alua_queue_data *qdata, bool force)
 {
@@ -870,8 +876,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
        unsigned long flags;
        struct workqueue_struct *alua_wq = kaluad_wq;
 
-       if (!pg)
-               return;
+       if (!pg || scsi_device_get(sdev))
+               return false;
 
        spin_lock_irqsave(&pg->lock, flags);
        if (qdata) {
@@ -884,14 +890,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
                pg->flags |= ALUA_PG_RUN_RTPG;
                kref_get(&pg->kref);
                pg->rtpg_sdev = sdev;
-               scsi_device_get(sdev);
                start_queue = 1;
        } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
                pg->flags |= ALUA_PG_RUN_RTPG;
                /* Do not queue if the worker is already running */
                if (!(pg->flags & ALUA_PG_RUNNING)) {
                        kref_get(&pg->kref);
-                       sdev = NULL;
                        start_queue = 1;
                }
        }
@@ -900,13 +904,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
                alua_wq = kaluad_sync_wq;
        spin_unlock_irqrestore(&pg->lock, flags);
 
-       if (start_queue &&
-           !queue_delayed_work(alua_wq, &pg->rtpg_work,
-                               msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
-               if (sdev)
-                       scsi_device_put(sdev);
-               kref_put(&pg->kref, release_port_group);
+       if (start_queue) {
+               if (queue_delayed_work(alua_wq, &pg->rtpg_work,
+                               msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
+                       sdev = NULL;
+               else
+                       kref_put(&pg->kref, release_port_group);
        }
+       if (sdev)
+               scsi_device_put(sdev);
+
+       return true;
 }
 
 /*
@@ -1007,11 +1015,13 @@ static int alua_activate(struct scsi_device *sdev,
                mutex_unlock(&h->init_mutex);
                goto out;
        }
-       fn = NULL;
        rcu_read_unlock();
        mutex_unlock(&h->init_mutex);
 
-       alua_rtpg_queue(pg, sdev, qdata, true);
+       if (alua_rtpg_queue(pg, sdev, qdata, true))
+               fn = NULL;
+       else
+               err = SCSI_DH_DEV_OFFLINED;
        kref_put(&pg->kref, release_port_group);
 out:
        if (fn)
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 763f012fdeca..87f5e694dbed 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd 
*qc)
                task->num_scatter = qc->n_elem;
        } else {
                for_each_sg(qc->sg, sg, qc->n_elem, si)
-                       xfer += sg->length;
+                       xfer += sg_dma_len(sg);
 
                task->total_xfer_len = xfer;
                task->num_scatter = si;
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index fe7469c901f7..ad33238cef17 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2153,8 +2153,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
                    "Timer for the VP[%d] has stopped\n", vha->vp_idx);
        }
 
-       BUG_ON(atomic_read(&vha->vref_count));
-
        qla2x00_free_fcports(vha);
 
        mutex_lock(&ha->vport_lock);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 73b12e41d992..8e63a7b90277 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3742,6 +3742,7 @@ typedef struct scsi_qla_host {
        struct qla8044_reset_template reset_tmplt;
        struct qla_tgt_counters tgt_counters;
        uint16_t        bbcr;
+       wait_queue_head_t vref_waitq;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
@@ -3780,6 +3781,7 @@ struct qla_tgt_vp_map {
        mb();                                                \
        if (__vha->flags.delete_progress) {                  \
                atomic_dec(&__vha->vref_count);              \
+               wake_up(&__vha->vref_waitq);            \
                __bail = 1;                                  \
        } else {                                             \
                __bail = 0;                                  \
@@ -3788,6 +3790,7 @@ struct qla_tgt_vp_map {
 
 #define QLA_VHA_MARK_NOT_BUSY(__vha) do {                   \
        atomic_dec(&__vha->vref_count);                      \
+       wake_up(&__vha->vref_waitq);                    \
 } while (0)
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 5b09296b46a3..8f12f6baa6b8 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -4356,6 +4356,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
                        }
                }
                atomic_dec(&vha->vref_count);
+               wake_up(&vha->vref_waitq);
        }
        spin_unlock_irqrestore(&ha->vport_slock, flags);
 }
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index cf7ba52bae66..3dfb54abc874 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
         * ensures no active vp_list traversal while the vport is removed
         * from the queue)
         */
-       spin_lock_irqsave(&ha->vport_slock, flags);
-       while (atomic_read(&vha->vref_count)) {
-               spin_unlock_irqrestore(&ha->vport_slock, flags);
-
-               msleep(500);
+       wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count),
+           10*HZ);
 
-               spin_lock_irqsave(&ha->vport_slock, flags);
+       spin_lock_irqsave(&ha->vport_slock, flags);
+       if (atomic_read(&vha->vref_count)) {
+               ql_dbg(ql_dbg_vport, vha, 0xfffa,
+                   "vha->vref_count=%u timeout\n", vha->vref_count.counter);
+               vha->vref_count = (atomic_t)ATOMIC_INIT(0);
        }
        list_del(&vha->list);
        qlt_update_vp_map(vha, RESET_VP_IDX);
@@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
 
                        spin_lock_irqsave(&ha->vport_slock, flags);
                        atomic_dec(&vha->vref_count);
+                       wake_up(&vha->vref_waitq);
                }
                i++;
        }
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index bea819e5336d..4f361d8d84be 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4045,6 +4045,7 @@ struct scsi_qla_host *qla2x00_create_host(struct 
scsi_host_template *sht,
 
        spin_lock_init(&vha->work_lock);
        spin_lock_init(&vha->cmd_list_lock);
+       init_waitqueue_head(&vha->vref_waitq);
 
        sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
        ql_dbg(ql_dbg_init, vha, 0x0041,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 121de0aaa6ad..f753df25ba34 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -998,6 +998,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned 
long arg)
                result = get_user(val, ip);
                if (result)
                        return result;
+               if (val > SG_MAX_CDB_SIZE)
+                       return -ENOMEM;
                sfp->next_cmd_len = (val > 0) ? val : 0;
                return 0;
        case SG_GET_VERSION_NUM:
diff --git a/drivers/tty/serial/atmel_serial.c 
b/drivers/tty/serial/atmel_serial.c
index fabbe76203bb..4d079cdaa7a3 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1938,6 +1938,11 @@ static void atmel_flush_buffer(struct uart_port *port)
                atmel_uart_writel(port, ATMEL_PDC_TCR, 0);
                atmel_port->pdc_tx.ofs = 0;
        }
+       /*
+        * in uart_flush_buffer(), the xmit circular buffer has just
+        * been cleared, so we have to reset tx_len accordingly.
+        */
+       atmel_port->tx_len = 0;
 }
 
 /*
@@ -2471,6 +2476,9 @@ static void atmel_console_write(struct console *co, const 
char *s, u_int count)
        pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN;
        atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
 
+       /* Make sure that tx path is actually able to send characters */
+       atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
+
        uart_console_write(port, s, count, atmel_console_putchar);
 
        /*
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 770454e0dfa3..07390f8c3681 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1085,7 +1085,7 @@ static void mxs_auart_settermios(struct uart_port *u,
                                        AUART_LINECTRL_BAUD_DIV_MAX);
                baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN;
                baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max);
-               div = u->uartclk * 32 / baud;
+               div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud);
        }
 
        ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 479e223f9cff..f029aad67183 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct 
urb *urb)
         */
        tbuf_size =  max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
        tbuf = kzalloc(tbuf_size, GFP_KERNEL);
-       if (!tbuf)
-               return -ENOMEM;
+       if (!tbuf) {
+               status = -ENOMEM;
+               goto err_alloc;
+       }
 
        bufp = tbuf;
 
@@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb 
*urb)
        }
 
        kfree(tbuf);
+ err_alloc:
 
        /* any errors get returned through the urb completion */
        spin_lock_irq(&hcd_root_hub_lock);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1536aeb0abab..4e894d301c88 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2532,17 +2532,14 @@ static void nfs41_check_delegation_stateid(struct 
nfs4_state *state)
        }
 
        nfs4_stateid_copy(&stateid, &delegation->stateid);
-       if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+       if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
+               !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
+                       &delegation->flags)) {
                rcu_read_unlock();
                nfs_finish_clear_delegation_stateid(state, &stateid);
                return;
        }
 
-       if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, 
&delegation->flags)) {
-               rcu_read_unlock();
-               return;
-       }
-
        cred = get_rpccred(delegation->cred);
        rcu_read_unlock();
        status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 010aff5c5a79..536009e50387 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -790,6 +790,7 @@ nfserrno (int errno)
                { nfserr_serverfault, -ESERVERFAULT },
                { nfserr_serverfault, -ENFILE },
                { nfserr_io, -EUCLEAN },
+               { nfserr_perm, -ENOKEY },
        };
        int     i;
 
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index d346d42c54d1..33db69be4832 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -39,6 +39,7 @@
 #include "xfs_rmap_btree.h"
 #include "xfs_btree.h"
 #include "xfs_refcount_btree.h"
+#include "xfs_ialloc_btree.h"
 
 /*
  * Per-AG Block Reservations
@@ -200,22 +201,30 @@ __xfs_ag_resv_init(
        struct xfs_mount                *mp = pag->pag_mount;
        struct xfs_ag_resv              *resv;
        int                             error;
+       xfs_extlen_t                    reserved;
 
-       resv = xfs_perag_resv(pag, type);
        if (used > ask)
                ask = used;
-       resv->ar_asked = ask;
-       resv->ar_reserved = resv->ar_orig_reserved = ask - used;
-       mp->m_ag_max_usable -= ask;
+       reserved = ask - used;
 
-       trace_xfs_ag_resv_init(pag, type, ask);
-
-       error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
-       if (error)
+       error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
+       if (error) {
                trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
                                error, _RET_IP_);
+               xfs_warn(mp,
+"Per-AG reservation for AG %u failed.  Filesystem may run out of space.",
+                               pag->pag_agno);
+               return error;
+       }
 
-       return error;
+       mp->m_ag_max_usable -= ask;
+
+       resv = xfs_perag_resv(pag, type);
+       resv->ar_asked = ask;
+       resv->ar_reserved = resv->ar_orig_reserved = reserved;
+
+       trace_xfs_ag_resv_init(pag, type, ask);
+       return 0;
 }
 
 /* Create a per-AG block reservation. */
@@ -223,6 +232,8 @@ int
 xfs_ag_resv_init(
        struct xfs_perag                *pag)
 {
+       struct xfs_mount                *mp = pag->pag_mount;
+       xfs_agnumber_t                  agno = pag->pag_agno;
        xfs_extlen_t                    ask;
        xfs_extlen_t                    used;
        int                             error = 0;
@@ -231,23 +242,45 @@ xfs_ag_resv_init(
        if (pag->pag_meta_resv.ar_asked == 0) {
                ask = used = 0;
 
-               error = xfs_refcountbt_calc_reserves(pag->pag_mount,
-                               pag->pag_agno, &ask, &used);
+               error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
                if (error)
                        goto out;
 
-               error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
-                               ask, used);
+               error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
                if (error)
                        goto out;
+
+               error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
+                               ask, used);
+               if (error) {
+                       /*
+                        * Because we didn't have per-AG reservations when the
+                        * finobt feature was added we might not be able to
+                        * reserve all needed blocks.  Warn and fall back to the
+                        * old and potentially buggy code in that case, but
+                        * ensure we do have the reservation for the refcountbt.
+                        */
+                       ask = used = 0;
+
+                       mp->m_inotbt_nores = true;
+
+                       error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
+                                       &used);
+                       if (error)
+                               goto out;
+
+                       error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
+                                       ask, used);
+                       if (error)
+                               goto out;
+               }
        }
 
        /* Create the AGFL metadata reservation */
        if (pag->pag_agfl_resv.ar_asked == 0) {
                ask = used = 0;
 
-               error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno,
-                               &ask, &used);
+               error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
                if (error)
                        goto out;
 
@@ -256,9 +289,16 @@ xfs_ag_resv_init(
                        goto out;
        }
 
+#ifdef DEBUG
+       /* need to read in the AGF for the ASSERT below to work */
+       error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
+       if (error)
+               return error;
+
        ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
               xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
               pag->pagf_freeblks + pag->pagf_flcount);
+#endif
 out:
        return error;
 }
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f52fd63fce19..5a508b011e27 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -769,8 +769,8 @@ xfs_bmap_extents_to_btree(
                args.type = XFS_ALLOCTYPE_START_BNO;
                args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
        } else if (dfops->dop_low) {
-try_another_ag:
                args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
                args.fsbno = *firstblock;
        } else {
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -796,17 +796,19 @@ xfs_bmap_extents_to_btree(
        if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
            args.fsbno == NULLFSBLOCK &&
            args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-               dfops->dop_low = true;
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
                goto try_another_ag;
        }
+       if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+               xfs_iroot_realloc(ip, -1, whichfork);
+               xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+               return -ENOSPC;
+       }
        /*
         * Allocation can't fail, the space was reserved.
         */
-       ASSERT(args.fsbno != NULLFSBLOCK);
        ASSERT(*firstblock == NULLFSBLOCK ||
-              args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
-              (dfops->dop_low &&
-               args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
+              args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
        *firstblock = cur->bc_private.b.firstblock = args.fsbno;
        cur->bc_private.b.allocated++;
        ip->i_d.di_nblocks++;
@@ -1278,7 +1280,6 @@ xfs_bmap_read_extents(
        /* REFERENCED */
        xfs_extnum_t            room;   /* number of entries there's room for */
 
-       bno = NULLFSBLOCK;
        mp = ip->i_mount;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
@@ -1291,9 +1292,7 @@ xfs_bmap_read_extents(
        ASSERT(level > 0);
        pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
        bno = be64_to_cpu(*pp);
-       ASSERT(bno != NULLFSBLOCK);
-       ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
-       ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
        /*
         * Go down the tree until leaf level is reached, following the first
         * pointer (leftmost) at each level.
@@ -1955,6 +1954,7 @@ xfs_bmap_add_extent_delay_real(
                 */
                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                xfs_bmbt_set_startblock(ep, new->br_startblock);
+               xfs_bmbt_set_state(ep, new->br_state);
                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
 
                (*nextents)++;
@@ -2293,6 +2293,7 @@ STATIC int                                /* error */
 xfs_bmap_add_extent_unwritten_real(
        struct xfs_trans        *tp,
        xfs_inode_t             *ip,    /* incore inode pointer */
+       int                     whichfork,
        xfs_extnum_t            *idx,   /* extent number to update/insert */
        xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
        xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
@@ -2312,12 +2313,14 @@ xfs_bmap_add_extent_unwritten_real(
                                        /* left is 0, right is 1, prev is 2 */
        int                     rval=0; /* return value (logging flags) */
        int                     state = 0;/* state bits, accessed thru macros */
-       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_mount        *mp = ip->i_mount;
 
        *logflagsp = 0;
 
        cur = *curp;
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
 
        ASSERT(*idx >= 0);
        ASSERT(*idx <= xfs_iext_count(ifp));
@@ -2376,7 +2379,7 @@ xfs_bmap_add_extent_unwritten_real(
         * Don't set contiguous if the combined extent would be too large.
         * Also check for all-three-contiguous being too large.
         */
-       if (*idx < xfs_iext_count(&ip->i_df) - 1) {
+       if (*idx < xfs_iext_count(ifp) - 1) {
                state |= BMAP_RIGHT_VALID;
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
                if (isnullstartblock(RIGHT.br_startblock))
@@ -2416,7 +2419,8 @@ xfs_bmap_add_extent_unwritten_real(
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
 
                xfs_iext_remove(ip, *idx + 1, 2, state);
-               ip->i_d.di_nextents -= 2;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2459,7 +2463,8 @@ xfs_bmap_add_extent_unwritten_real(
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
 
                xfs_iext_remove(ip, *idx + 1, 1, state);
-               ip->i_d.di_nextents--;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2494,7 +2499,8 @@ xfs_bmap_add_extent_unwritten_real(
                xfs_bmbt_set_state(ep, newext);
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
                xfs_iext_remove(ip, *idx + 1, 1, state);
-               ip->i_d.di_nextents--;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2606,7 +2612,8 @@ xfs_bmap_add_extent_unwritten_real(
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
 
                xfs_iext_insert(ip, *idx, 1, new, state);
-               ip->i_d.di_nextents++;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2684,7 +2691,8 @@ xfs_bmap_add_extent_unwritten_real(
                ++*idx;
                xfs_iext_insert(ip, *idx, 1, new, state);
 
-               ip->i_d.di_nextents++;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2732,7 +2740,8 @@ xfs_bmap_add_extent_unwritten_real(
                ++*idx;
                xfs_iext_insert(ip, *idx, 2, &r[0], state);
 
-               ip->i_d.di_nextents += 2;
+               XFS_IFORK_NEXT_SET(ip, whichfork,
+                               XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
                if (cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2786,17 +2795,17 @@ xfs_bmap_add_extent_unwritten_real(
        }
 
        /* update reverse mappings */
-       error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
+       error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
        if (error)
                goto done;
 
        /* convert to a btree if necessary */
-       if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
+       if (xfs_bmap_needs_btree(ip, whichfork)) {
                int     tmp_logflags;   /* partial log flag return val */
 
                ASSERT(cur == NULL);
                error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
-                               0, &tmp_logflags, XFS_DATA_FORK);
+                               0, &tmp_logflags, whichfork);
                *logflagsp |= tmp_logflags;
                if (error)
                        goto done;
@@ -2808,7 +2817,7 @@ xfs_bmap_add_extent_unwritten_real(
                *curp = cur;
        }
 
-       xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
+       xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
 done:
        *logflagsp |= rval;
        return error;
@@ -2900,7 +2909,8 @@ xfs_bmap_add_extent_hole_delay(
                oldlen = startblockval(left.br_startblock) +
                        startblockval(new->br_startblock) +
                        startblockval(right.br_startblock);
-               newlen = xfs_bmap_worst_indlen(ip, temp);
+               newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                                        oldlen);
                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
                        nullstartblock((int)newlen));
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2921,7 +2931,8 @@ xfs_bmap_add_extent_hole_delay(
                xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
                oldlen = startblockval(left.br_startblock) +
                        startblockval(new->br_startblock);
-               newlen = xfs_bmap_worst_indlen(ip, temp);
+               newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                                        oldlen);
                xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
                        nullstartblock((int)newlen));
                trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2937,7 +2948,8 @@ xfs_bmap_add_extent_hole_delay(
                temp = new->br_blockcount + right.br_blockcount;
                oldlen = startblockval(new->br_startblock) +
                        startblockval(right.br_startblock);
-               newlen = xfs_bmap_worst_indlen(ip, temp);
+               newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                                        oldlen);
                xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
                        new->br_startoff,
                        nullstartblock((int)newlen), temp, right.br_state);
@@ -3913,17 +3925,13 @@ xfs_bmap_btalloc(
                 * the first block that was allocated.
                 */
                ASSERT(*ap->firstblock == NULLFSBLOCK ||
-                      XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
-                      XFS_FSB_TO_AGNO(mp, args.fsbno) ||
-                      (ap->dfops->dop_low &&
-                       XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
-                       XFS_FSB_TO_AGNO(mp, args.fsbno)));
+                      XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
+                      XFS_FSB_TO_AGNO(mp, args.fsbno));
 
                ap->blkno = args.fsbno;
                if (*ap->firstblock == NULLFSBLOCK)
                        *ap->firstblock = args.fsbno;
-               ASSERT(nullfb || fb_agno == args.agno ||
-                      (ap->dfops->dop_low && fb_agno < args.agno));
+               ASSERT(nullfb || fb_agno <= args.agno);
                ap->length = args.len;
                if (!(ap->flags & XFS_BMAPI_COWFORK))
                        ap->ip->i_d.di_nblocks += args.len;
@@ -4249,6 +4257,19 @@ xfs_bmapi_read(
        return 0;
 }
 
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
 int
 xfs_bmapi_reserve_delalloc(
        struct xfs_inode        *ip,
@@ -4335,13 +4356,8 @@ xfs_bmapi_reserve_delalloc(
        got->br_startblock = nullstartblock(indlen);
        got->br_blockcount = alen;
        got->br_state = XFS_EXT_NORM;
-       xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
 
-       /*
-        * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
-        * might have merged it into one of the neighbouring ones.
-        */
-       xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+       xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
 
        /*
         * Tag the inode if blocks were preallocated. Note that COW fork
@@ -4353,10 +4369,6 @@ xfs_bmapi_reserve_delalloc(
        if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
                xfs_inode_set_cowblocks_tag(ip);
 
-       ASSERT(got->br_startoff <= aoff);
-       ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
-       ASSERT(isnullstartblock(got->br_startblock));
-       ASSERT(got->br_state == XFS_EXT_NORM);
        return 0;
 
 out_unreserve_blocks:
@@ -4461,10 +4473,16 @@ xfs_bmapi_allocate(
        bma->got.br_state = XFS_EXT_NORM;
 
        /*
-        * A wasdelay extent has been initialized, so shouldn't be flagged
-        * as unwritten.
+        * In the data fork, a wasdelay extent has been initialized, so
+        * shouldn't be flagged as unwritten.
+        *
+        * For the cow fork, however, we convert delalloc reservations
+        * (extents allocated for speculative preallocation) to
+        * allocated unwritten extents, and only convert the unwritten
+        * extents to real extents when we're about to write the data.
         */
-       if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
+       if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
+           (bma->flags & XFS_BMAPI_PREALLOC) &&
            xfs_sb_version_hasextflgbit(&mp->m_sb))
                bma->got.br_state = XFS_EXT_UNWRITTEN;
 
@@ -4515,8 +4533,6 @@ xfs_bmapi_convert_unwritten(
                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
                return 0;
 
-       ASSERT(whichfork != XFS_COW_FORK);
-
        /*
         * Modify (by adding) the state flag, if writing.
         */
@@ -4541,8 +4557,8 @@ xfs_bmapi_convert_unwritten(
                        return error;
        }
 
-       error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
-                       &bma->cur, mval, bma->firstblock, bma->dfops,
+       error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
+                       &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
                        &tmp_logflags);
        /*
         * Log the inode core unconditionally in the unwritten extent conversion
@@ -4551,8 +4567,12 @@ xfs_bmapi_convert_unwritten(
         * in the transaction for the sake of fsync(), even if nothing has
         * changed, because fsync() will not force the log for this transaction
         * unless it sees the inode pinned.
+        *
+        * Note: If we're only converting cow fork extents, there aren't
+        * any on-disk updates to make, so we don't need to log anything.
         */
-       bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
+       if (whichfork != XFS_COW_FORK)
+               bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
        if (error)
                return error;
 
@@ -4626,15 +4646,15 @@ xfs_bmapi_write(
        ASSERT(*nmap >= 1);
        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
        ASSERT(!(flags & XFS_BMAPI_IGSTATE));
-       ASSERT(tp != NULL);
+       ASSERT(tp != NULL ||
+              (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
+                       (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
        ASSERT(len > 0);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
        ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
        ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
-       ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
-       ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
 
        /* zeroing is for currently only for data extents, not metadata */
        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -4840,13 +4860,9 @@ xfs_bmapi_write(
        if (bma.cur) {
                if (!error) {
                        ASSERT(*firstblock == NULLFSBLOCK ||
-                              XFS_FSB_TO_AGNO(mp, *firstblock) ==
+                              XFS_FSB_TO_AGNO(mp, *firstblock) <=
                               XFS_FSB_TO_AGNO(mp,
-                                      bma.cur->bc_private.b.firstblock) ||
-                              (dfops->dop_low &&
-                               XFS_FSB_TO_AGNO(mp, *firstblock) <
-                               XFS_FSB_TO_AGNO(mp,
-                                       bma.cur->bc_private.b.firstblock)));
+                                      bma.cur->bc_private.b.firstblock));
                        *firstblock = bma.cur->bc_private.b.firstblock;
                }
                xfs_btree_del_cursor(bma.cur,
@@ -4881,34 +4897,59 @@ xfs_bmap_split_indlen(
        xfs_filblks_t                   len2 = *indlen2;
        xfs_filblks_t                   nres = len1 + len2; /* new total res. */
        xfs_filblks_t                   stolen = 0;
+       xfs_filblks_t                   resfactor;
 
        /*
         * Steal as many blocks as we can to try and satisfy the worst case
         * indlen for both new extents.
         */
-       while (nres > ores && avail) {
-               nres--;
-               avail--;
-               stolen++;
-       }
+       if (ores < nres && avail)
+               stolen = XFS_FILBLKS_MIN(nres - ores, avail);
+       ores += stolen;
+
+        /* nothing else to do if we've satisfied the new reservation */
+       if (ores >= nres)
+               return stolen;
+
+       /*
+        * We can't meet the total required reservation for the two extents.
+        * Calculate the percent of the overall shortage between both extents
+        * and apply this percentage to each of the requested indlen values.
+        * This distributes the shortage fairly and reduces the chances that one
+        * of the two extents is left with nothing when extents are repeatedly
+        * split.
+        */
+       resfactor = (ores * 100);
+       do_div(resfactor, nres);
+       len1 *= resfactor;
+       do_div(len1, 100);
+       len2 *= resfactor;
+       do_div(len2, 100);
+       ASSERT(len1 + len2 <= ores);
+       ASSERT(len1 < *indlen1 && len2 < *indlen2);
 
        /*
-        * The only blocks available are those reserved for the original
-        * extent and what we can steal from the extent being removed.
-        * If this still isn't enough to satisfy the combined
-        * requirements for the two new extents, skim blocks off of each
-        * of the new reservations until they match what is available.
+        * Hand out the remainder to each extent. If one of the two reservations
+        * is zero, we want to make sure that one gets a block first. The loop
+        * below starts with len1, so hand len2 a block right off the bat if it
+        * is zero.
         */
-       while (nres > ores) {
-               if (len1) {
-                       len1--;
-                       nres--;
+       ores -= (len1 + len2);
+       ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
+       if (ores && !len2 && *indlen2) {
+               len2++;
+               ores--;
+       }
+       while (ores) {
+               if (len1 < *indlen1) {
+                       len1++;
+                       ores--;
                }
-               if (nres == ores)
+               if (!ores)
                        break;
-               if (len2) {
-                       len2--;
-                       nres--;
+               if (len2 < *indlen2) {
+                       len2++;
+                       ores--;
                }
        }
 
@@ -5656,8 +5697,8 @@ __xfs_bunmapi(
                        }
                        del.br_state = XFS_EXT_UNWRITTEN;
                        error = xfs_bmap_add_extent_unwritten_real(tp, ip,
-                                       &lastx, &cur, &del, firstblock, dfops,
-                                       &logflags);
+                                       whichfork, &lastx, &cur, &del,
+                                       firstblock, dfops, &logflags);
                        if (error)
                                goto error0;
                        goto nodelete;
@@ -5714,8 +5755,9 @@ __xfs_bunmapi(
                                prev.br_state = XFS_EXT_UNWRITTEN;
                                lastx--;
                                error = xfs_bmap_add_extent_unwritten_real(tp,
-                                               ip, &lastx, &cur, &prev,
-                                               firstblock, dfops, &logflags);
+                                               ip, whichfork, &lastx, &cur,
+                                               &prev, firstblock, dfops,
+                                               &logflags);
                                if (error)
                                        goto error0;
                                goto nodelete;
@@ -5723,8 +5765,9 @@ __xfs_bunmapi(
                                ASSERT(del.br_state == XFS_EXT_NORM);
                                del.br_state = XFS_EXT_UNWRITTEN;
                                error = xfs_bmap_add_extent_unwritten_real(tp,
-                                               ip, &lastx, &cur, &del,
-                                               firstblock, dfops, &logflags);
+                                               ip, whichfork, &lastx, &cur,
+                                               &del, firstblock, dfops,
+                                               &logflags);
                                if (error)
                                        goto error0;
                                goto nodelete;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f76c1693ff01..5c3918678bb6 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -453,8 +453,8 @@ xfs_bmbt_alloc_block(
 
        if (args.fsbno == NULLFSBLOCK) {
                args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
                args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
                /*
                 * Make sure there is sufficient room left in the AG to
                 * complete a full tree split for an extent insert.  If
@@ -494,8 +494,8 @@ xfs_bmbt_alloc_block(
        if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
            args.fsbno == NULLFSBLOCK &&
            args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-               cur->bc_private.b.dfops->dop_low = true;
                args.fsbno = cur->bc_private.b.firstblock;
+               args.type = XFS_ALLOCTYPE_FIRST_AG;
                goto try_another_ag;
        }
 
@@ -512,7 +512,7 @@ xfs_bmbt_alloc_block(
                        goto error0;
                cur->bc_private.b.dfops->dop_low = true;
        }
-       if (args.fsbno == NULLFSBLOCK) {
+       if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
                XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
                *stat = 0;
                return 0;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 21e6a6ab6b9a..2849d3fa3d0b 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -810,7 +810,8 @@ xfs_btree_read_bufl(
        xfs_daddr_t             d;              /* real disk block address */
        int                     error;
 
-       ASSERT(fsbno != NULLFSBLOCK);
+       if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
+               return -EFSCORRUPTED;
        d = XFS_FSB_TO_DADDR(mp, fsbno);
        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
                                   mp->m_bsize, lock, &bp, ops);
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index c2b01d1c79ee..3b0fc1afada5 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -491,7 +491,7 @@ static inline int xfs_btree_get_level(struct 
xfs_btree_block *block)
 #define        XFS_FILBLKS_MAX(a,b)    max_t(xfs_filblks_t, (a), (b))
 
 #define        XFS_FSB_SANITY_CHECK(mp,fsb)    \
-       (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
+       (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
                XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
 
 /*
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index f2dc1a950c85..1bdf2888295b 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2633,7 +2633,7 @@ xfs_da_read_buf(
 /*
  * Readahead the dir/attr block.
  */
-xfs_daddr_t
+int
 xfs_da_reada_buf(
        struct xfs_inode        *dp,
        xfs_dablk_t             bno,
@@ -2664,7 +2664,5 @@ xfs_da_reada_buf(
        if (mapp != &map)
                kmem_free(mapp);
 
-       if (error)
-               return -1;
-       return mappedbno;
+       return error;
 }
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 98c75cbe6ac2..4e29cb6a3627 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct 
xfs_inode *dp,
                               xfs_dablk_t bno, xfs_daddr_t mappedbno,
                               struct xfs_buf **bpp, int whichfork,
                               const struct xfs_buf_ops *ops);
-xfs_daddr_t    xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+int    xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
                                xfs_daddr_t mapped_bno, int whichfork,
                                const struct xfs_buf_ops *ops);
 int    xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 75a557432d0f..bbd1238852b3 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
        .verify_write = xfs_dir3_free_write_verify,
 };
 
+/* Everything ok in the free block header? */
+static bool
+xfs_dir3_free_header_check(
+       struct xfs_inode        *dp,
+       xfs_dablk_t             fbno,
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = dp->i_mount;
+       unsigned int            firstdb;
+       int                     maxbests;
+
+       maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo);
+       firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
+                  xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
+                       maxbests;
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
+
+               if (be32_to_cpu(hdr3->firstdb) != firstdb)
+                       return false;
+               if (be32_to_cpu(hdr3->nvalid) > maxbests)
+                       return false;
+               if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
+                       return false;
+       } else {
+               struct xfs_dir2_free_hdr *hdr = bp->b_addr;
+
+               if (be32_to_cpu(hdr->firstdb) != firstdb)
+                       return false;
+               if (be32_to_cpu(hdr->nvalid) > maxbests)
+                       return false;
+               if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
+                       return false;
+       }
+       return true;
+}
 
 static int
 __xfs_dir3_free_read(
@@ -168,11 +204,22 @@ __xfs_dir3_free_read(
 
        err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
                                XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
+       if (err || !*bpp)
+               return err;
+
+       /* Check things that we can't do in the verifier. */
+       if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
+               xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
+               xfs_verifier_error(*bpp);
+               xfs_trans_brelse(tp, *bpp);
+               return -EFSCORRUPTED;
+       }
 
        /* try read returns without an error or *bpp if it lands in a hole */
-       if (!err && tp && *bpp)
+       if (tp)
                xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
-       return err;
+
+       return 0;
 }
 
 int
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index d45c03779dae..a2818f6e8598 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment(
        struct xfs_mount        *mp)
 {
        if (xfs_sb_version_hasalign(&mp->m_sb) &&
-           mp->m_sb.sb_inoalignmt >=
-                       XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+           mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
                return mp->m_sb.sb_inoalignmt;
        return 1;
 }
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 6c6b95947e71..b9c351ff0422 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -82,11 +82,12 @@ xfs_finobt_set_root(
 }
 
 STATIC int
-xfs_inobt_alloc_block(
+__xfs_inobt_alloc_block(
        struct xfs_btree_cur    *cur,
        union xfs_btree_ptr     *start,
        union xfs_btree_ptr     *new,
-       int                     *stat)
+       int                     *stat,
+       enum xfs_ag_resv_type   resv)
 {
        xfs_alloc_arg_t         args;           /* block allocation args */
        int                     error;          /* error return value */
@@ -103,6 +104,7 @@ xfs_inobt_alloc_block(
        args.maxlen = 1;
        args.prod = 1;
        args.type = XFS_ALLOCTYPE_NEAR_BNO;
+       args.resv = resv;
 
        error = xfs_alloc_vextent(&args);
        if (error) {
@@ -123,6 +125,27 @@ xfs_inobt_alloc_block(
 }
 
 STATIC int
+xfs_inobt_alloc_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *start,
+       union xfs_btree_ptr     *new,
+       int                     *stat)
+{
+       return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE);
+}
+
+STATIC int
+xfs_finobt_alloc_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *start,
+       union xfs_btree_ptr     *new,
+       int                     *stat)
+{
+       return __xfs_inobt_alloc_block(cur, start, new, stat,
+                       XFS_AG_RESV_METADATA);
+}
+
+STATIC int
 xfs_inobt_free_block(
        struct xfs_btree_cur    *cur,
        struct xfs_buf          *bp)
@@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
 
        .dup_cursor             = xfs_inobt_dup_cursor,
        .set_root               = xfs_finobt_set_root,
-       .alloc_block            = xfs_inobt_alloc_block,
+       .alloc_block            = xfs_finobt_alloc_block,
        .free_block             = xfs_inobt_free_block,
        .get_minrecs            = xfs_inobt_get_minrecs,
        .get_maxrecs            = xfs_inobt_get_maxrecs,
@@ -478,3 +501,64 @@ xfs_inobt_rec_check_count(
        return 0;
 }
 #endif /* DEBUG */
+
+static xfs_extlen_t
+xfs_inobt_max_size(
+       struct xfs_mount        *mp)
+{
+       /* Bail out if we're uninitialized, which can happen in mkfs. */
+       if (mp->m_inobt_mxr[0] == 0)
+               return 0;
+
+       return xfs_btree_calc_size(mp, mp->m_inobt_mnr,
+               (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
+                               XFS_INODES_PER_CHUNK);
+}
+
+static int
+xfs_inobt_count_blocks(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_btnum_t             btnum,
+       xfs_extlen_t            *tree_blocks)
+{
+       struct xfs_buf          *agbp;
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
+       if (error)
+               return error;
+
+       cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum);
+       error = xfs_btree_count_blocks(cur, tree_blocks);
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       xfs_buf_relse(agbp);
+
+       return error;
+}
+
+/*
+ * Figure out how many blocks to reserve and how many are used by this btree.
+ */
+int
+xfs_finobt_calc_reserves(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_extlen_t            *ask,
+       xfs_extlen_t            *used)
+{
+       xfs_extlen_t            tree_len = 0;
+       int                     error;
+
+       if (!xfs_sb_version_hasfinobt(&mp->m_sb))
+               return 0;
+
+       error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len);
+       if (error)
+               return error;
+
+       *ask += xfs_inobt_max_size(mp);
+       *used += tree_len;
+       return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index bd88453217ce..aa81e2e63f3f 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
 #define xfs_inobt_rec_check_count(mp, rec)     0
 #endif /* DEBUG */
 
+int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno,
+               xfs_extlen_t *ask, xfs_extlen_t *used);
+
 #endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 222e103356c6..25c1e078aef6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -26,6 +26,7 @@
 #include "xfs_inode.h"
 #include "xfs_trans.h"
 #include "xfs_inode_item.h"
+#include "xfs_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_bmap.h"
 #include "xfs_error.h"
@@ -429,11 +430,13 @@ xfs_iformat_btree(
        /* REFERENCED */
        int                     nrecs;
        int                     size;
+       int                     level;
 
        ifp = XFS_IFORK_PTR(ip, whichfork);
        dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
        size = XFS_BMAP_BROOT_SPACE(mp, dfp);
        nrecs = be16_to_cpu(dfp->bb_numrecs);
+       level = be16_to_cpu(dfp->bb_level);
 
        /*
         * blow out if -- fork has less extents than can fit in
@@ -446,7 +449,8 @@ xfs_iformat_btree(
                                        XFS_IFORK_MAXEXT(ip, whichfork) ||
                     XFS_BMDR_SPACE_CALC(nrecs) >
                                        XFS_DFORK_SIZE(dip, mp, whichfork) ||
-                    XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+                    XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
+                    level == 0 || level > XFS_BTREE_MAXLEVELS) {
                xfs_warn(mp, "corrupt inode %Lu (btree).",
                                        (unsigned long long) ip->i_ino);
                XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
@@ -497,15 +501,14 @@ xfs_iread_extents(
         * We know that the size is valid (it's checked in iformat_btree)
         */
        ifp->if_bytes = ifp->if_real_bytes = 0;
-       ifp->if_flags |= XFS_IFEXTENTS;
        xfs_iext_add(ifp, 0, nextents);
        error = xfs_bmap_read_extents(tp, ip, whichfork);
        if (error) {
                xfs_iext_destroy(ifp);
-               ifp->if_flags &= ~XFS_IFEXTENTS;
                return error;
        }
        xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
+       ifp->if_flags |= XFS_IFEXTENTS;
        return 0;
 }
 /*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 06763f5cc701..0457abe4118a 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -279,54 +279,49 @@ xfs_end_io(
        struct xfs_ioend        *ioend =
                container_of(work, struct xfs_ioend, io_work);
        struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+       xfs_off_t               offset = ioend->io_offset;
+       size_t                  size = ioend->io_size;
        int                     error = ioend->io_bio->bi_error;
 
        /*
-        * Set an error if the mount has shut down and proceed with end I/O
-        * processing so it can perform whatever cleanups are necessary.
+        * Just clean up the in-memory strutures if the fs has been shut down.
         */
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                error = -EIO;
+               goto done;
+       }
 
        /*
-        * For a CoW extent, we need to move the mapping from the CoW fork
-        * to the data fork.  If instead an error happened, just dump the
-        * new blocks.
+        * Clean up any COW blocks on an I/O error.
         */
-       if (ioend->io_type == XFS_IO_COW) {
-               if (error)
-                       goto done;
-               if (ioend->io_bio->bi_error) {
-                       error = xfs_reflink_cancel_cow_range(ip,
-                                       ioend->io_offset, ioend->io_size);
-                       goto done;
+       if (unlikely(error)) {
+               switch (ioend->io_type) {
+               case XFS_IO_COW:
+                       xfs_reflink_cancel_cow_range(ip, offset, size, true);
+                       break;
                }
-               error = xfs_reflink_end_cow(ip, ioend->io_offset,
-                               ioend->io_size);
-               if (error)
-                       goto done;
+
+               goto done;
        }
 
        /*
-        * For unwritten extents we need to issue transactions to convert a
-        * range to normal written extens after the data I/O has finished.
-        * Detecting and handling completion IO errors is done individually
-        * for each case as different cleanup operations need to be performed
-        * on error.
+        * Success:  commit the COW or unwritten blocks if needed.
         */
-       if (ioend->io_type == XFS_IO_UNWRITTEN) {
-               if (error)
-                       goto done;
-               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-                                                 ioend->io_size);
-       } else if (ioend->io_append_trans) {
-               error = xfs_setfilesize_ioend(ioend, error);
-       } else {
-               ASSERT(!xfs_ioend_is_append(ioend) ||
-                      ioend->io_type == XFS_IO_COW);
+       switch (ioend->io_type) {
+       case XFS_IO_COW:
+               error = xfs_reflink_end_cow(ip, offset, size);
+               break;
+       case XFS_IO_UNWRITTEN:
+               error = xfs_iomap_write_unwritten(ip, offset, size);
+               break;
+       default:
+               ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+               break;
        }
 
 done:
+       if (ioend->io_append_trans)
+               error = xfs_setfilesize_ioend(ioend, error);
        xfs_destroy_ioend(ioend, error);
 }
 
@@ -486,6 +481,12 @@ xfs_submit_ioend(
        struct xfs_ioend        *ioend,
        int                     status)
 {
+       /* Convert CoW extents to regular */
+       if (!status && ioend->io_type == XFS_IO_COW) {
+               status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
+                               ioend->io_offset, ioend->io_size);
+       }
+
        /* Reserve log space if we might write beyond the on-disk inode size. */
        if (!status &&
            ioend->io_type != XFS_IO_UNWRITTEN &&
@@ -1257,44 +1258,6 @@ xfs_map_trim_size(
        bh_result->b_size = mapping_size;
 }
 
-/* Bounce unaligned directio writes to the page cache. */
-static int
-xfs_bounce_unaligned_dio_write(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           offset_fsb,
-       struct xfs_bmbt_irec    *imap)
-{
-       struct xfs_bmbt_irec    irec;
-       xfs_fileoff_t           delta;
-       bool                    shared;
-       bool                    x;
-       int                     error;
-
-       irec = *imap;
-       if (offset_fsb > irec.br_startoff) {
-               delta = offset_fsb - irec.br_startoff;
-               irec.br_blockcount -= delta;
-               irec.br_startblock += delta;
-               irec.br_startoff = offset_fsb;
-       }
-       error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
-       if (error)
-               return error;
-
-       /*
-        * We're here because we're trying to do a directio write to a
-        * region that isn't aligned to a filesystem block.  If any part
-        * of the extent is shared, fall back to buffered mode to handle
-        * the RMW.  This is done by returning -EREMCHG ("remote addr
-        * changed"), which is caught further up the call stack.
-        */
-       if (shared) {
-               trace_xfs_reflink_bounce_dio_write(ip, imap);
-               return -EREMCHG;
-       }
-       return 0;
-}
-
 STATIC int
 __xfs_get_blocks(
        struct inode            *inode,
@@ -1432,13 +1395,6 @@ __xfs_get_blocks(
        if (imap.br_startblock != HOLESTARTBLOCK &&
            imap.br_startblock != DELAYSTARTBLOCK &&
            (create || !ISUNWRITTEN(&imap))) {
-               if (create && direct && !is_cow) {
-                       error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
-                                       &imap);
-                       if (error)
-                               return error;
-               }
-
                xfs_map_buffer(inode, bh_result, &imap, offset);
                if (ISUNWRITTEN(&imap))
                        set_buffer_unwritten(bh_result);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index efb8ccd6bbf2..5c395e485170 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
  */
 int
 xfs_free_eofblocks(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *ip,
-       bool            need_iolock)
+       struct xfs_inode        *ip)
 {
-       xfs_trans_t     *tp;
-       int             error;
-       xfs_fileoff_t   end_fsb;
-       xfs_fileoff_t   last_fsb;
-       xfs_filblks_t   map_len;
-       int             nimaps;
-       xfs_bmbt_irec_t imap;
+       struct xfs_trans        *tp;
+       int                     error;
+       xfs_fileoff_t           end_fsb;
+       xfs_fileoff_t           last_fsb;
+       xfs_filblks_t           map_len;
+       int                     nimaps;
+       struct xfs_bmbt_irec    imap;
+       struct xfs_mount        *mp = ip->i_mount;
+
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 
        /*
         * Figure out if there are any blocks beyond the end
@@ -944,6 +945,10 @@ xfs_free_eofblocks(
        error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
+       /*
+        * If there are blocks after the end of file, truncate the file to its
+        * current size to free them up.
+        */
        if (!error && (nimaps != 0) &&
            (imap.br_startblock != HOLESTARTBLOCK ||
             ip->i_delayed_blks)) {
@@ -954,22 +959,13 @@ xfs_free_eofblocks(
                if (error)
                        return error;
 
-               /*
-                * There are blocks after the end of file.
-                * Free them up now by truncating the file to
-                * its current size.
-                */
-               if (need_iolock) {
-                       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
-                               return -EAGAIN;
-               }
+               /* wait on dio to ensure i_size has settled */
+               inode_dio_wait(VFS_I(ip));
 
                error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
                                &tp);
                if (error) {
                        ASSERT(XFS_FORCED_SHUTDOWN(mp));
-                       if (need_iolock)
-                               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                        return error;
                }
 
@@ -997,8 +993,6 @@ xfs_free_eofblocks(
                }
 
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               if (need_iolock)
-                       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
        }
        return error;
 }
@@ -1393,10 +1387,16 @@ xfs_shift_file_space(
        xfs_fileoff_t           stop_fsb;
        xfs_fileoff_t           next_fsb;
        xfs_fileoff_t           shift_fsb;
+       uint                    resblks;
 
        ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
 
        if (direction == SHIFT_LEFT) {
+               /*
+                * Reserve blocks to cover potential extent merges after left
+                * shift operations.
+                */
+               resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
                next_fsb = XFS_B_TO_FSB(mp, offset + len);
                stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
        } else {
@@ -1404,6 +1404,7 @@ xfs_shift_file_space(
                 * If right shift, delegate the work of initialization of
                 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
                 */
+               resblks = 0;
                next_fsb = NULLFSBLOCK;
                stop_fsb = XFS_B_TO_FSB(mp, offset);
        }
@@ -1415,7 +1416,7 @@ xfs_shift_file_space(
         * into the accessible region of the file.
         */
        if (xfs_can_free_eofblocks(ip, true)) {
-               error = xfs_free_eofblocks(mp, ip, false);
+               error = xfs_free_eofblocks(ip);
                if (error)
                        return error;
        }
@@ -1445,21 +1446,14 @@ xfs_shift_file_space(
        }
 
        while (!error && !done) {
-               /*
-                * We would need to reserve permanent block for transaction.
-                * This will come into picture when after shifting extent into
-                * hole we found that adjacent extents can be merged which
-                * may lead to freeing of a block during record update.
-                */
-               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
-                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+                                       &tp);
                if (error)
                        break;
 
                xfs_ilock(ip, XFS_ILOCK_EXCL);
                error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
-                               ip->i_gdquot, ip->i_pdquot,
-                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+                               ip->i_gdquot, ip->i_pdquot, resblks, 0,
                                XFS_QMOPT_RES_REGBLKS);
                if (error)
                        goto out_trans_cancel;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 68a621a8e0c0..f1005393785c 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,8 +63,7 @@ int   xfs_insert_file_space(struct xfs_inode *, xfs_off_t 
offset,
 
 /* EOF block manipulation functions */
 bool   xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
-int    xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
-                          bool need_iolock);
+int    xfs_free_eofblocks(struct xfs_inode *ip);
 
 int    xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
                         struct xfs_swapext *sx);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2975cb2319f4..0306168af332 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks(
         */
        bp->b_last_error = 0;
        bp->b_retries = 0;
+       bp->b_first_retry_time = 0;
 
        xfs_buf_do_callbacks(bp);
        bp->b_fspriv = NULL;
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 162dc186cf04..29c2f997aedf 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -45,18 +45,7 @@ xfs_extent_busy_insert(
        struct rb_node          **rbp;
        struct rb_node          *parent = NULL;
 
-       new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
-       if (!new) {
-               /*
-                * No Memory!  Since it is now not possible to track the free
-                * block, make this a synchronous transaction to insure that
-                * the block is not reused before this transaction commits.
-                */
-               trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
-               xfs_trans_set_sync(tp);
-               return;
-       }
-
+       new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
        new->agno = agno;
        new->bno = bno;
        new->length = len;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9a5d64b5f35a..1209ad29e902 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -554,6 +554,15 @@ xfs_file_dio_aio_write(
        if ((iocb->ki_pos & mp->m_blockmask) ||
            ((iocb->ki_pos + count) & mp->m_blockmask)) {
                unaligned_io = 1;
+
+               /*
+                * We can't properly handle unaligned direct I/O to reflink
+                * files yet, as we can't unshare a partial block.
+                */
+               if (xfs_is_reflink_inode(ip)) {
+                       trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, 
count);
+                       return -EREMCHG;
+               }
                iolock = XFS_IOLOCK_EXCL;
        } else {
                iolock = XFS_IOLOCK_SHARED;
@@ -675,8 +684,10 @@ xfs_file_buffered_aio_write(
        struct xfs_inode        *ip = XFS_I(inode);
        ssize_t                 ret;
        int                     enospc = 0;
-       int                     iolock = XFS_IOLOCK_EXCL;
+       int                     iolock;
 
+write_retry:
+       iolock = XFS_IOLOCK_EXCL;
        xfs_rw_ilock(ip, iolock);
 
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
@@ -686,7 +697,6 @@ xfs_file_buffered_aio_write(
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = inode_to_bdi(inode);
 
-write_retry:
        trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
        ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
        if (likely(ret >= 0))
@@ -702,18 +712,21 @@ xfs_file_buffered_aio_write(
         * running at the same time.
         */
        if (ret == -EDQUOT && !enospc) {
+               xfs_rw_iunlock(ip, iolock);
                enospc = xfs_inode_free_quota_eofblocks(ip);
                if (enospc)
                        goto write_retry;
                enospc = xfs_inode_free_quota_cowblocks(ip);
                if (enospc)
                        goto write_retry;
+               iolock = 0;
        } else if (ret == -ENOSPC && !enospc) {
                struct xfs_eofblocks eofb = {0};
 
                enospc = 1;
                xfs_flush_inodes(ip->i_mount);
-               eofb.eof_scan_owner = ip->i_ino; /* for locking */
+
+               xfs_rw_iunlock(ip, iolock);
                eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
                xfs_icache_free_eofblocks(ip->i_mount, &eofb);
                goto write_retry;
@@ -721,7 +734,8 @@ xfs_file_buffered_aio_write(
 
        current->backing_dev_info = NULL;
 out:
-       xfs_rw_iunlock(ip, iolock);
+       if (iolock)
+               xfs_rw_iunlock(ip, iolock);
        return ret;
 }
 
@@ -987,9 +1001,9 @@ xfs_dir_open(
         */
        mode = xfs_ilock_data_map_shared(ip);
        if (ip->i_d.di_nextents > 0)
-               xfs_dir3_data_readahead(ip, 0, -1);
+               error = xfs_dir3_data_readahead(ip, 0, -1);
        xfs_iunlock(ip, mode);
-       return 0;
+       return error;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 29cc9886a3cb..3fb1f3fb8efe 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1324,13 +1324,10 @@ xfs_inode_free_eofblocks(
        int                     flags,
        void                    *args)
 {
-       int ret;
+       int ret = 0;
        struct xfs_eofblocks *eofb = args;
-       bool need_iolock = true;
        int match;
 
-       ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
        if (!xfs_can_free_eofblocks(ip, false)) {
                /* inode could be preallocated or append-only */
                trace_xfs_inode_free_eofblocks_invalid(ip);
@@ -1358,21 +1355,19 @@ xfs_inode_free_eofblocks(
                if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
                    XFS_ISIZE(ip) < eofb->eof_min_file_size)
                        return 0;
-
-               /*
-                * A scan owner implies we already hold the iolock. Skip it in
-                * xfs_free_eofblocks() to avoid deadlock. This also eliminates
-                * the possibility of EAGAIN being returned.
-                */
-               if (eofb->eof_scan_owner == ip->i_ino)
-                       need_iolock = false;
        }
 
-       ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
-
-       /* don't revisit the inode if we're not waiting */
-       if (ret == -EAGAIN && !(flags & SYNC_WAIT))
-               ret = 0;
+       /*
+        * If the caller is waiting, return -EAGAIN to keep the background
+        * scanner moving and revisit the inode in a subsequent pass.
+        */
+       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+               if (flags & SYNC_WAIT)
+                       ret = -EAGAIN;
+               return ret;
+       }
+       ret = xfs_free_eofblocks(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 
        return ret;
 }
@@ -1419,15 +1414,10 @@ __xfs_inode_free_quota_eofblocks(
        struct xfs_eofblocks eofb = {0};
        struct xfs_dquot *dq;
 
-       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-
        /*
-        * Set the scan owner to avoid a potential livelock. Otherwise, the scan
-        * can repeatedly trylock on the inode we're currently processing. We
-        * run a sync scan to increase effectiveness and use the union filter to
+        * Run a sync scan to increase effectiveness and use the union filter to
         * cover all applicable quotas in a single scan.
         */
-       eofb.eof_scan_owner = ip->i_ino;
        eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
 
        if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
@@ -1579,12 +1569,9 @@ xfs_inode_free_cowblocks(
 {
        int ret;
        struct xfs_eofblocks *eofb = args;
-       bool need_iolock = true;
        int match;
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 
-       ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
        /*
         * Just clear the tag if we have an empty cow fork or none at all. It's
         * possible the inode was fully unshared since it was originally tagged.
@@ -1617,28 +1604,16 @@ xfs_inode_free_cowblocks(
                if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
                    XFS_ISIZE(ip) < eofb->eof_min_file_size)
                        return 0;
-
-               /*
-                * A scan owner implies we already hold the iolock. Skip it in
-                * xfs_free_eofblocks() to avoid deadlock. This also eliminates
-                * the possibility of EAGAIN being returned.
-                */
-               if (eofb->eof_scan_owner == ip->i_ino)
-                       need_iolock = false;
        }
 
        /* Free the CoW blocks */
-       if (need_iolock) {
-               xfs_ilock(ip, XFS_IOLOCK_EXCL);
-               xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
-       }
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
 
-       ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+       ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
 
-       if (need_iolock) {
-               xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       }
+       xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 
        return ret;
 }
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index a1e02f4708ab..8a7c849b4dea 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,7 +27,6 @@ struct xfs_eofblocks {
        kgid_t          eof_gid;
        prid_t          eof_prid;
        __u64           eof_min_file_size;
-       xfs_ino_t       eof_scan_owner;
 };
 
 #define SYNC_WAIT              0x0001  /* wait for i/o to complete */
@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user(
        dst->eof_flags = src->eof_flags;
        dst->eof_prid = src->eof_prid;
        dst->eof_min_file_size = src->eof_min_file_size;
-       dst->eof_scan_owner = NULLFSINO;
 
        dst->eof_uid = INVALID_UID;
        if (src->eof_flags & XFS_EOF_FLAGS_UID) {
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 512ff13ed66a..e50636c9a89c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,7 +1624,7 @@ xfs_itruncate_extents(
 
        /* Remove all pending CoW reservations. */
        error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
-                       last_block);
+                       last_block, true);
        if (error)
                goto out;
 
@@ -1701,32 +1701,34 @@ xfs_release(
        if (xfs_can_free_eofblocks(ip, false)) {
 
                /*
+                * Check if the inode is being opened, written and closed
+                * frequently and we have delayed allocation blocks outstanding
+                * (e.g. streaming writes from the NFS server), truncating the
+                * blocks past EOF will cause fragmentation to occur.
+                *
+                * In this case don't do the truncation, but we have to be
+                * careful how we detect this case. Blocks beyond EOF show up as
+                * i_delayed_blks even when the inode is clean, so we need to
+                * truncate them away first before checking for a dirty release.
+                * Hence on the first dirty close we will still remove the
+                * speculative allocation, but after that we will leave it in
+                * place.
+                */
+               if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+                       return 0;
+               /*
                 * If we can't get the iolock just skip truncating the blocks
                 * past EOF because we could deadlock with the mmap_sem
-                * otherwise.  We'll get another chance to drop them once the
+                * otherwise. We'll get another chance to drop them once the
                 * last reference to the inode is dropped, so we'll never leak
                 * blocks permanently.
-                *
-                * Further, check if the inode is being opened, written and
-                * closed frequently and we have delayed allocation blocks
-                * outstanding (e.g. streaming writes from the NFS server),
-                * truncating the blocks past EOF will cause fragmentation to
-                * occur.
-                *
-                * In this case don't do the truncation, either, but we have to
-                * be careful how we detect this case. Blocks beyond EOF show
-                * up as i_delayed_blks even when the inode is clean, so we
-                * need to truncate them away first before checking for a dirty
-                * release. Hence on the first dirty close we will still remove
-                * the speculative allocation, but after that we will leave it
-                * in place.
                 */
-               if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
-                       return 0;
-
-               error = xfs_free_eofblocks(mp, ip, true);
-               if (error && error != -EAGAIN)
-                       return error;
+               if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+                       error = xfs_free_eofblocks(ip);
+                       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+                       if (error)
+                               return error;
+               }
 
                /* delalloc blocks after truncation means it really is dirty */
                if (ip->i_delayed_blks)
@@ -1801,22 +1803,23 @@ xfs_inactive_ifree(
        int                     error;
 
        /*
-        * The ifree transaction might need to allocate blocks for record
-        * insertion to the finobt. We don't want to fail here at ENOSPC, so
-        * allow ifree to dip into the reserved block pool if necessary.
-        *
-        * Freeing large sets of inodes generally means freeing inode chunks,
-        * directory and file data blocks, so this should be relatively safe.
-        * Only under severe circumstances should it be possible to free enough
-        * inodes to exhaust the reserve block pool via finobt expansion while
-        * at the same time not creating free space in the filesystem.
+        * We try to use a per-AG reservation for any block needed by the finobt
+        * tree, but as the finobt feature predates the per-AG reservation
+        * support a degraded file system might not have enough space for the
+        * reservation at mount time.  In that case try to dip into the reserved
+        * pool and pray.
         *
         * Send a warning if the reservation does happen to fail, as the inode
         * now remains allocated and sits on the unlinked list until the fs is
         * repaired.
         */
-       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
-                       XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
+       if (unlikely(mp->m_inotbt_nores)) {
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
+                               XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
+                               &tp);
+       } else {
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
+       }
        if (error) {
                if (error == -ENOSPC) {
                        xfs_warn_ratelimited(mp,
@@ -1912,8 +1915,11 @@ xfs_inactive(
                 * cache. Post-eof blocks must be freed, lest we end up with
                 * broken free space accounting.
                 */
-               if (xfs_can_free_eofblocks(ip, true))
-                       xfs_free_eofblocks(mp, ip, false);
+               if (xfs_can_free_eofblocks(ip, true)) {
+                       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+                       xfs_free_eofblocks(ip);
+                       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+               }
 
                return;
        }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index e8889614cec3..360562484e7b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -637,6 +637,11 @@ xfs_file_iomap_begin_delay(
                goto out_unlock;
        }
 
+       /*
+        * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+        * them out if the write happens to fail.
+        */
+       iomap->flags = IOMAP_F_NEW;
        trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
        if (isnullstartblock(got.br_startblock))
@@ -685,7 +690,7 @@ xfs_iomap_write_allocate(
        int             nres;
 
        if (whichfork == XFS_COW_FORK)
-               flags |= XFS_BMAPI_COWFORK;
+               flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
 
        /*
         * Make sure that the dquots are there.
@@ -1061,7 +1066,8 @@ xfs_file_iomap_end_delalloc(
        struct xfs_inode        *ip,
        loff_t                  offset,
        loff_t                  length,
-       ssize_t                 written)
+       ssize_t                 written,
+       struct iomap            *iomap)
 {
        struct xfs_mount        *mp = ip->i_mount;
        xfs_fileoff_t           start_fsb;
@@ -1080,14 +1086,14 @@ xfs_file_iomap_end_delalloc(
        end_fsb = XFS_B_TO_FSB(mp, offset + length);
 
        /*
-        * Trim back delalloc blocks if we didn't manage to write the whole
-        * range reserved.
+        * Trim delalloc blocks if they were allocated by this write and we
+        * didn't manage to write the whole range.
         *
         * We don't need to care about racing delalloc as we hold i_mutex
         * across the reserve/allocate/unreserve calls. If there are delalloc
         * blocks in the range, they are ours.
         */
-       if (start_fsb < end_fsb) {
+       if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
                truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
                                         XFS_FSB_TO_B(mp, end_fsb) - 1);
 
@@ -1117,7 +1123,7 @@ xfs_file_iomap_end(
 {
        if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
                return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
-                               length, written);
+                               length, written, iomap);
        return 0;
 }
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b341f10cf481..13796f212f98 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -502,8 +502,7 @@ STATIC void
 xfs_set_inoalignment(xfs_mount_t *mp)
 {
        if (xfs_sb_version_hasalign(&mp->m_sb) &&
-           mp->m_sb.sb_inoalignmt >=
-           XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+               mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
                mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
        else
                mp->m_inoalign_mask = 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 819b80b15bfb..1bf878b0492c 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -140,6 +140,7 @@ typedef struct xfs_mount {
        int                     m_fixedfsid[2]; /* unchanged for life of FS */
        uint                    m_dmevmask;     /* DMI events for this FS */
        __uint64_t              m_flags;        /* global mount flags */
+       bool                    m_inotbt_nores; /* no per-AG finobt resv. */
        int                     m_ialloc_inos;  /* inodes in inode allocation */
        int                     m_ialloc_blks;  /* blocks in inode allocation */
        int                     m_ialloc_min_blks;/* min blocks in sparse inode
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 4d3f74e3c5e1..2252f163c38f 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -82,11 +82,22 @@
  * mappings are a reservation against the free space in the filesystem;
  * adjacent mappings can also be combined into fewer larger mappings.
  *
+ * As an optimization, the CoW extent size hint (cowextsz) creates
+ * outsized aligned delalloc reservations in the hope of landing out of
+ * order nearby CoW writes in a single extent on disk, thereby reducing
+ * fragmentation and improving future performance.
+ *
+ * D: --RRRRRRSSSRRRRRRRR--- (data fork)
+ * C: ------DDDDDDD--------- (CoW fork)
+ *
  * When dirty pages are being written out (typically in writepage), the
- * delalloc reservations are converted into real mappings by allocating
- * blocks and replacing the delalloc mapping with real ones.  A delalloc
- * mapping can be replaced by several real ones if the free space is
- * fragmented.
+ * delalloc reservations are converted into unwritten mappings by
+ * allocating blocks and replacing the delalloc mapping with real ones.
+ * A delalloc mapping can be replaced by several unwritten ones if the
+ * free space is fragmented.
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUUUUUU---------
  *
  * We want to adapt the delalloc mechanism for copy-on-write, since the
  * write paths are similar.  The first two steps (creating the reservation
@@ -101,13 +112,29 @@
  * Block-aligned directio writes will use the same mechanism as buffered
  * writes.
  *
+ * Just prior to submitting the actual disk write requests, we convert
+ * the extents representing the range of the file actually being written
+ * (as opposed to extra pieces created for the cowextsize hint) to real
+ * extents.  This will become important in the next step:
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUrrUUU---------
+ *
  * CoW remapping must be done after the data block write completes,
  * because we don't want to destroy the old data fork map until we're sure
  * the new block has been written.  Since the new mappings are kept in a
  * separate fork, we can simply iterate these mappings to find the ones
  * that cover the file blocks that we just CoW'd.  For each extent, simply
  * unmap the corresponding range in the data fork, map the new range into
- * the data fork, and remove the extent from the CoW fork.
+ * the data fork, and remove the extent from the CoW fork.  Because of
+ * the presence of the cowextsize hint, however, we must be careful
+ * only to remap the blocks that we've actually written out --  we must
+ * never remap delalloc reservations nor CoW staging blocks that have
+ * yet to be written.  This corresponds exactly to the real extents in
+ * the CoW fork:
+ *
+ * D: --RRRRRRrrSRRRRRRRR---
+ * C: ------UU--UUU---------
  *
  * Since the remapping operation can be applied to an arbitrary file
  * range, we record the need for the remap step as a flag in the ioend
@@ -296,6 +323,65 @@ xfs_reflink_reserve_cow(
        return 0;
 }
 
+/* Convert part of an unwritten CoW extent to a real one. */
+STATIC int
+xfs_reflink_convert_cow_extent(
+       struct xfs_inode                *ip,
+       struct xfs_bmbt_irec            *imap,
+       xfs_fileoff_t                   offset_fsb,
+       xfs_filblks_t                   count_fsb,
+       struct xfs_defer_ops            *dfops)
+{
+       struct xfs_bmbt_irec            irec = *imap;
+       xfs_fsblock_t                   first_block;
+       int                             nimaps = 1;
+
+       if (imap->br_state == XFS_EXT_NORM)
+               return 0;
+
+       xfs_trim_extent(&irec, offset_fsb, count_fsb);
+       trace_xfs_reflink_convert_cow(ip, &irec);
+       if (irec.br_blockcount == 0)
+               return 0;
+       return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount,
+                       XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
+                       0, &irec, &nimaps, dfops);
+}
+
+/* Convert all of the unwritten CoW extents in a file's range to real ones. */
+int
+xfs_reflink_convert_cow(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               count)
+{
+       struct xfs_bmbt_irec    got;
+       struct xfs_defer_ops    dfops;
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + count);
+       xfs_extnum_t            idx;
+       bool                    found;
+       int                     error = 0;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       /* Convert all the extents to real from unwritten. */
+       for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
+            found && got.br_startoff < end_fsb;
+            found = xfs_iext_get_extent(ifp, ++idx, &got)) {
+               error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
+                               end_fsb - offset_fsb, &dfops);
+               if (error)
+                       break;
+       }
+
+       /* Finish up. */
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+}
+
 /* Allocate all CoW reservations covering a range of blocks in a file. */
 static int
 __xfs_reflink_allocate_cow(
@@ -328,6 +414,7 @@ __xfs_reflink_allocate_cow(
                goto out_unlock;
        ASSERT(nimaps == 1);
 
+       /* Make sure there's a CoW reservation for it. */
        error = xfs_reflink_reserve_cow(ip, &imap, &shared);
        if (error)
                goto out_trans_cancel;
@@ -337,14 +424,16 @@ __xfs_reflink_allocate_cow(
                goto out_trans_cancel;
        }
 
+       /* Allocate the entire reservation as unwritten blocks. */
        xfs_trans_ijoin(tp, ip, 0);
        error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
-                       XFS_BMAPI_COWFORK, &first_block,
+                       XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
                        XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
                        &imap, &nimaps, &dfops);
        if (error)
                goto out_trans_cancel;
 
+       /* Finish up. */
        error = xfs_defer_finish(&tp, &dfops, NULL);
        if (error)
                goto out_trans_cancel;
@@ -389,11 +478,12 @@ xfs_reflink_allocate_cow_range(
                if (error) {
                        trace_xfs_reflink_allocate_cow_range_error(ip, error,
                                        _RET_IP_);
-                       break;
+                       return error;
                }
        }
 
-       return error;
+       /* Convert the CoW extents to regular. */
+       return xfs_reflink_convert_cow(ip, offset, count);
 }
 
 /*
@@ -481,14 +571,18 @@ xfs_reflink_trim_irec_to_next_cow(
 }
 
 /*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
  */
 int
 xfs_reflink_cancel_cow_blocks(
        struct xfs_inode                *ip,
        struct xfs_trans                **tpp,
        xfs_fileoff_t                   offset_fsb,
-       xfs_fileoff_t                   end_fsb)
+       xfs_fileoff_t                   end_fsb,
+       bool                            cancel_real)
 {
        struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
        struct xfs_bmbt_irec            got, prev, del;
@@ -515,7 +609,7 @@ xfs_reflink_cancel_cow_blocks(
                                        &idx, &got, &del);
                        if (error)
                                break;
-               } else {
+               } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
                        xfs_trans_ijoin(*tpp, ip, 0);
                        xfs_defer_init(&dfops, &firstfsb);
 
@@ -558,13 +652,17 @@ xfs_reflink_cancel_cow_blocks(
 }
 
 /*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
  */
 int
 xfs_reflink_cancel_cow_range(
        struct xfs_inode        *ip,
        xfs_off_t               offset,
-       xfs_off_t               count)
+       xfs_off_t               count,
+       bool                    cancel_real)
 {
        struct xfs_trans        *tp;
        xfs_fileoff_t           offset_fsb;
@@ -590,7 +688,8 @@ xfs_reflink_cancel_cow_range(
        xfs_trans_ijoin(tp, ip, 0);
 
        /* Scrape out the old CoW reservations */
-       error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+       error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+                       cancel_real);
        if (error)
                goto out_cancel;
 
@@ -669,6 +768,16 @@ xfs_reflink_end_cow(
 
                ASSERT(!isnullstartblock(got.br_startblock));
 
+               /*
+                * Don't remap unwritten extents; these are
+                * speculatively preallocated CoW extents that have been
+                * allocated but have not yet been involved in a write.
+                */
+               if (got.br_state == XFS_EXT_UNWRITTEN) {
+                       idx--;
+                       goto next_extent;
+               }
+
                /* Unmap the old blocks in the data fork. */
                xfs_defer_init(&dfops, &firstfsb);
                rlen = del.br_blockcount;
@@ -885,13 +994,14 @@ STATIC int
 xfs_reflink_update_dest(
        struct xfs_inode        *dest,
        xfs_off_t               newlen,
-       xfs_extlen_t            cowextsize)
+       xfs_extlen_t            cowextsize,
+       bool                    is_dedupe)
 {
        struct xfs_mount        *mp = dest->i_mount;
        struct xfs_trans        *tp;
        int                     error;
 
-       if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+       if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
                return 0;
 
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -912,6 +1022,10 @@ xfs_reflink_update_dest(
                dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
        }
 
+       if (!is_dedupe) {
+               xfs_trans_ichgtime(tp, dest,
+                                  XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       }
        xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
 
        error = xfs_trans_commit(tp);
@@ -1428,7 +1542,8 @@ xfs_reflink_remap_range(
            !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
                cowextsize = src->i_d.di_cowextsize;
 
-       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
+       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+                       is_dedupe);
 
 out_unlock:
        xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
@@ -1580,7 +1695,7 @@ xfs_reflink_clear_inode_flag(
         * We didn't find any shared blocks so turn off the reflink flag.
         * First, get rid of any leftover CoW mappings.
         */
-       error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+       error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
        if (error)
                return error;
 
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 97ea9b487884..a57966fc7ddd 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -30,6 +30,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
                struct xfs_bmbt_irec *imap, bool *shared);
 extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
                xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
+               xfs_off_t count);
 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t 
offset,
                struct xfs_bmbt_irec *imap, bool *need_alloc);
 extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
@@ -37,9 +39,9 @@ extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode 
*ip,
 
 extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
                struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
-               xfs_fileoff_t end_fsb);
+               xfs_fileoff_t end_fsb, bool cancel_real);
 extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
-               xfs_off_t count);
+               xfs_off_t count, bool cancel_real);
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ade4691e3f74..dbbd3f1fd2b7 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -948,7 +948,7 @@ xfs_fs_destroy_inode(
        XFS_STATS_INC(ip->i_mount, vn_remove);
 
        if (xfs_is_reflink_inode(ip)) {
-               error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+               error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
                if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
                        xfs_warn(ip->i_mount,
 "Error %d while evicting CoW blocks for inode %llu.",
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0907752be62d..828f383df121 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3183,6 +3183,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
                __field(xfs_fileoff_t, lblk)
                __field(xfs_extlen_t, len)
                __field(xfs_fsblock_t, pblk)
+               __field(int, state)
        ),
        TP_fast_assign(
                __entry->dev = VFS_I(ip)->i_sb->s_dev;
@@ -3190,13 +3191,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
                __entry->lblk = irec->br_startoff;
                __entry->len = irec->br_blockcount;
                __entry->pblk = irec->br_startblock;
+               __entry->state = irec->br_state;
        ),
-       TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu",
+       TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->lblk,
                  __entry->len,
-                 __entry->pblk)
+                 __entry->pblk,
+                 __entry->state)
 );
 #define DEFINE_INODE_IREC_EVENT(name) \
 DEFINE_EVENT(xfs_inode_irec_class, name, \
@@ -3345,11 +3348,12 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
+DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
 
 DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
 DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
 
-DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
 DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01c0b9cc3915..8c58db2c09c6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus 
bus_idx, gpa_t addr,
                    int len, void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                            int len, struct kvm_io_device *dev);
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-                             struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+                              struct kvm_io_device *dev);
 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
                                         gpa_t addr);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 254698856b8f..8b35bdbdc214 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -739,6 +739,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
        return false;
 }
 
+static inline void mem_cgroup_update_page_stat(struct page *page,
+                                              enum mem_cgroup_stat_index idx,
+                                              int nr)
+{
+}
+
 static inline void mem_cgroup_inc_page_stat(struct page *page,
                                            enum mem_cgroup_stat_index idx)
 {
diff --git a/kernel/padata.c b/kernel/padata.c
index 7848f0566403..b4a3c0ae649b 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -190,19 +190,20 @@ static struct padata_priv *padata_get_next(struct 
parallel_data *pd)
 
        reorder = &next_queue->reorder;
 
+       spin_lock(&reorder->lock);
        if (!list_empty(&reorder->list)) {
                padata = list_entry(reorder->list.next,
                                    struct padata_priv, list);
 
-               spin_lock(&reorder->lock);
                list_del_init(&padata->list);
                atomic_dec(&pd->reorder_objects);
-               spin_unlock(&reorder->lock);
 
                pd->processed++;
 
+               spin_unlock(&reorder->lock);
                goto out;
        }
+       spin_unlock(&reorder->lock);
 
        if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
                padata = ERR_PTR(-ENODATA);
diff --git a/lib/syscall.c b/lib/syscall.c
index 63239e097b13..a72cd0996230 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -11,6 +11,7 @@ static int collect_syscall(struct task_struct *target, long 
*callno,
 
        if (!try_get_task_stack(target)) {
                /* Task has no stack, so the task isn't in a syscall. */
+               *sp = *pc = 0;
                *callno = -1;
                return 0;
        }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b6adedbafaf5..65c36acf8a6b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4471,6 +4471,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long 
address,
 {
        struct page *page = NULL;
        spinlock_t *ptl;
+       pte_t pte;
 retry:
        ptl = pmd_lockptr(mm, pmd);
        spin_lock(ptl);
@@ -4480,12 +4481,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long 
address,
         */
        if (!pmd_huge(*pmd))
                goto out;
-       if (pmd_present(*pmd)) {
+       pte = huge_ptep_get((pte_t *)pmd);
+       if (pte_present(pte)) {
                page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
                if (flags & FOLL_GET)
                        get_page(page);
        } else {
-               if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+               if (is_hugetlb_entry_migration(pte)) {
                        spin_unlock(ptl);
                        __migration_entry_wait(mm, (pte_t *)pmd, ptl);
                        goto retry;
diff --git a/mm/rmap.c b/mm/rmap.c
index 1ef36404e7b2..cd37c1c7e21b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1295,7 +1295,7 @@ void page_add_file_rmap(struct page *page, bool compound)
                        goto out;
        }
        __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
-       mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+       mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
 out:
        unlock_page_memcg(page);
 }
@@ -1335,7 +1335,7 @@ static void page_remove_file_rmap(struct page *page, bool 
compound)
         * pte lock(a spinlock) is held, which implies preemption disabled.
         */
        __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
-       mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+       mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);
 
        if (unlikely(PageMlocked(page)))
                clear_page_mlock(page);
diff --git a/mm/workingset.c b/mm/workingset.c
index 33f6f4db32fd..4c4f05655e6e 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -492,7 +492,7 @@ static int __init workingset_init(void)
        pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
               timestamp_bits, max_order, bucket_order);
 
-       ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key);
+       ret = __list_lru_init(&workingset_shadow_nodes, true, 
&shadow_nodes_key);
        if (ret)
                goto err;
        ret = register_shrinker(&workingset_shadow_shrinker);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2efb335deada..25a30be862e9 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
 #include <linux/kthread.h>
 #include <linux/net.h>
 #include <linux/nsproxy.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 {
        struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
        struct socket *sock;
+       unsigned int noio_flag;
        int ret;
 
        BUG_ON(con->sock);
+
+       /* sock_create_kern() allocates with GFP_KERNEL */
+       noio_flag = memalloc_noio_save();
        ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
                               SOCK_STREAM, IPPROTO_TCP, &sock);
+       memalloc_noio_restore(noio_flag);
        if (ret)
                return ret;
        sock->sk->sk_allocation = GFP_NOFS;
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 3f4efcb85df5..3490d21ab9e7 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int 
poolsize)
        /* NOTE: overflow flag is not cleared */
        spin_unlock_irqrestore(&f->lock, flags);
 
+       /* close the old pool and wait until all users are gone */
+       snd_seq_pool_mark_closing(oldpool);
+       snd_use_lock_sync(&f->use_lock);
+
        /* release cells in old pool */
        for (cell = oldhead; cell; cell = next) {
                next = cell->next;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 112caa2d3c14..bb1aad39d987 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4846,6 +4846,7 @@ enum {
        ALC292_FIXUP_DISABLE_AAMIX,
        ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
        ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+       ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
        ALC275_FIXUP_DELL_XPS,
        ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
        ALC293_FIXUP_LENOVO_SPK_NOISE,
@@ -5446,6 +5447,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE
        },
+       [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x18, 0x01a1913c }, /* use as headset mic, without 
its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE
+       },
        [ALC275_FIXUP_DELL_XPS] = {
                .type = HDA_FIXUP_VERBS,
                .v.verbs = (const struct hda_verb[]) {
@@ -5518,7 +5528,7 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc298_fixup_speaker_volume,
                .chained = true,
-               .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+               .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
        },
        [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
                .type = HDA_FIXUP_PINS,
diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
index 89ac5f5a93eb..7ae46c2647d4 100644
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c
@@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct 
snd_soc_dai *codec_dai,
 }
 
 #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8)
-#define CLASSD_ACLK_RATE_12M288_MPY_8  (12228 * 1000 * 8)
+#define CLASSD_ACLK_RATE_12M288_MPY_8  (12288 * 1000 * 8)
 
 static struct {
        int rate;
diff --git a/sound/soc/intel/skylake/skl-topology.c 
b/sound/soc/intel/skylake/skl-topology.c
index b5b1934d8550..bef8a4546c12 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -448,7 +448,7 @@ static int skl_tplg_set_module_init_data(struct 
snd_soc_dapm_widget *w)
                        if (bc->set_params != SKL_PARAM_INIT)
                                continue;
 
-                       mconfig->formats_config.caps = (u32 *)&bc->params;
+                       mconfig->formats_config.caps = (u32 *)bc->params;
                        mconfig->formats_config.caps_size = bc->size;
 
                        break;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd9522..4d28a9ddbee0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus 
bus_idx,
                        continue;
 
                kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-               kvm->buses[bus_idx]->ioeventfd_count--;
+               if (kvm->buses[bus_idx])
+                       kvm->buses[bus_idx]->ioeventfd_count--;
                ioeventfd_release(p);
                ret = 0;
                break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7f9ee2929cfe..f4c6d4f6d2e8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -720,8 +720,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
        list_del(&kvm->vm_list);
        spin_unlock(&kvm_lock);
        kvm_free_irq_routing(kvm);
-       for (i = 0; i < KVM_NR_BUSES; i++)
-               kvm_io_bus_destroy(kvm->buses[i]);
+       for (i = 0; i < KVM_NR_BUSES; i++) {
+               if (kvm->buses[i])
+                       kvm_io_bus_destroy(kvm->buses[i]);
+               kvm->buses[i] = NULL;
+       }
        kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
        mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -3463,6 +3466,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus 
bus_idx, gpa_t addr,
        };
 
        bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       if (!bus)
+               return -ENOMEM;
        r = __kvm_io_bus_write(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
@@ -3480,6 +3485,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum 
kvm_bus bus_idx,
        };
 
        bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       if (!bus)
+               return -ENOMEM;
 
        /* First try the device referenced by cookie. */
        if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3530,6 +3537,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus 
bus_idx, gpa_t addr,
        };
 
        bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       if (!bus)
+               return -ENOMEM;
        r = __kvm_io_bus_read(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
@@ -3542,6 +3551,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus 
bus_idx, gpa_t addr,
        struct kvm_io_bus *new_bus, *bus;
 
        bus = kvm->buses[bus_idx];
+       if (!bus)
+               return -ENOMEM;
+
        /* exclude ioeventfd which is limited by maximum fd */
        if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
                return -ENOSPC;
@@ -3561,37 +3573,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum 
kvm_bus bus_idx, gpa_t addr,
 }
 
 /* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-                             struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+                              struct kvm_io_device *dev)
 {
-       int i, r;
+       int i;
        struct kvm_io_bus *new_bus, *bus;
 
        bus = kvm->buses[bus_idx];
-       r = -ENOENT;
+       if (!bus)
+               return;
+
        for (i = 0; i < bus->dev_count; i++)
                if (bus->range[i].dev == dev) {
-                       r = 0;
                        break;
                }
 
-       if (r)
-               return r;
+       if (i == bus->dev_count)
+               return;
 
        new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
                          sizeof(struct kvm_io_range)), GFP_KERNEL);
-       if (!new_bus)
-               return -ENOMEM;
+       if (!new_bus)  {
+               pr_err("kvm: failed to shrink bus, removing it completely\n");
+               goto broken;
+       }
 
        memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
        new_bus->dev_count--;
        memcpy(new_bus->range + i, bus->range + i + 1,
               (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
 
+broken:
        rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
        synchronize_srcu_expedited(&kvm->srcu);
        kfree(bus);
-       return r;
+       return;
 }
 
 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3604,6 +3620,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, 
enum kvm_bus bus_idx,
        srcu_idx = srcu_read_lock(&kvm->srcu);
 
        bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+       if (!bus)
+               goto out_unlock;
 
        dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
        if (dev_idx < 0)
Re: Linux 4.9.21

Reply via email to