Re: Linux 3.14.47

Greg KH Mon, 06 Jul 2015 08:32:35 -0700
diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 6cd63a9010fb..bc6d61773ee2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2344,7 +2344,8 @@ should be created before this ioctl is invoked.
 
 Possible features:
        - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
-         Depends on KVM_CAP_ARM_PSCI.
+         Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
+         and execute guest code when KVM_RUN is called.
        - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
          Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 
diff --git a/Makefile b/Makefile
index def39fdd9df4..f9041e6d4d19 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 14
-SUBLEVEL = 46
+SUBLEVEL = 47
 EXTRAVERSION =
 NAME = Remembering Coco
 
diff --git a/arch/arm/include/asm/kvm_emulate.h 
b/arch/arm/include/asm/kvm_emulate.h
index 0fa90c962ac8..853e2becad18 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
        return 1;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 0cbdb8ed71cf..9f7923193cda 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -47,6 +47,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -78,17 +79,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
        flush_pmd_entry(pte);
 }
 
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
-       unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
-       if (hsr_ec == HSR_EC_IABT)
-               return false;
-       else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
-               return false;
-       else
-               return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd)
 {
        clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index df6e75e47ae0..2e74a617147d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -220,6 +220,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 
unsigned int id)
        int err;
        struct kvm_vcpu *vcpu;
 
+       if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+               err = -EBUSY;
+               goto out;
+       }
+
        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
        if (!vcpu) {
                err = -ENOMEM;
@@ -427,9 +432,9 @@ static void update_vttbr(struct kvm *kvm)
 
        /* update vttbr to be used with the new vmid */
        pgd_phys = virt_to_phys(kvm->arch.pgd);
+       BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
        vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
-       kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
-       kvm->arch.vttbr |= vmid;
+       kvm->arch.vttbr = pgd_phys | vmid;
 
        spin_unlock(&kvm_vmid_lock);
 }
@@ -676,10 +681,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu 
*vcpu,
                return ret;
 
        /*
+        * Ensure a rebooted VM will fault in RAM pages and detect if the
+        * guest MMU is turned off and flush the caches as needed.
+        */
+       if (vcpu->arch.has_run_once)
+               stage2_unmap_vm(vcpu->kvm);
+
+       vcpu_reset_hcr(vcpu);
+
+       /*
         * Handle the "start in power-off" case by marking the VCPU as paused.
         */
-       if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+       if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
                vcpu->arch.pause = true;
+       else
+               vcpu->arch.pause = false;
 
        return 0;
 }
@@ -825,7 +841,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
        switch (action) {
        case CPU_STARTING:
        case CPU_STARTING_FROZEN:
-               cpu_init_hyp_mode(NULL);
+               if (__hyp_get_vectors() == hyp_default_vectors)
+                       cpu_init_hyp_mode(NULL);
                break;
        }
 
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index b23a59c1c522..2786eae10c0d 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.hcr = HCR_GUEST_MASK;
        return 0;
 }
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 70ed2c1f57b0..524b4b57f650 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -197,7 +197,8 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
        pgd = pgdp + pgd_index(addr);
        do {
                next = kvm_pgd_addr_end(addr, end);
-               unmap_puds(kvm, pgd, addr, next);
+               if (!pgd_none(*pgd))
+                       unmap_puds(kvm, pgd, addr, next);
        } while (pgd++, addr = next, addr != end);
 }
 
@@ -555,6 +556,71 @@ static void unmap_stage2_range(struct kvm *kvm, 
phys_addr_t start, u64 size)
        unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+                                struct kvm_memory_slot *memslot)
+{
+       hva_t hva = memslot->userspace_addr;
+       phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+       phys_addr_t size = PAGE_SIZE * memslot->npages;
+       hva_t reg_end = hva + size;
+
+       /*
+        * A memory region could potentially cover multiple VMAs, and any holes
+        * between them, so iterate over all of them to find out if we should
+        * unmap any of them.
+        *
+        *     +--------------------------------------------+
+        * +---------------+----------------+   +----------------+
+        * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+        * +---------------+----------------+   +----------------+
+        *     |               memory region                |
+        *     +--------------------------------------------+
+        */
+       do {
+               struct vm_area_struct *vma = find_vma(current->mm, hva);
+               hva_t vm_start, vm_end;
+
+               if (!vma || vma->vm_start >= reg_end)
+                       break;
+
+               /*
+                * Take the intersection of this VMA with the memory region
+                */
+               vm_start = max(hva, vma->vm_start);
+               vm_end = min(reg_end, vma->vm_end);
+
+               if (!(vma->vm_flags & VM_PFNMAP)) {
+                       gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+                       unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+               }
+               hva = vm_end;
+       } while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots)
+               stage2_unmap_memslot(kvm, memslot);
+
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:       The KVM struct pointer for the VM.
@@ -746,6 +812,19 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, 
phys_addr_t *ipap)
        return false;
 }
 
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+       if (kvm_vcpu_trap_is_iabt(vcpu))
+               return false;
+
+       return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+       return !pfn_valid(pfn);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          struct kvm_memory_slot *memslot,
                          unsigned long fault_status)
@@ -761,7 +840,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
        pfn_t pfn;
        pgprot_t mem_type = PAGE_S2;
 
-       write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+       write_fault = kvm_is_write_fault(vcpu);
        if (fault_status == FSC_PERM && !write_fault) {
                kvm_err("Unexpected L2 read permission error\n");
                return -EFAULT;
@@ -770,6 +849,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
        /* Let's check if we will get back a huge page backed by hugetlbfs */
        down_read(&current->mm->mmap_sem);
        vma = find_vma_intersection(current->mm, hva, hva + 1);
+       if (unlikely(!vma)) {
+               kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+               up_read(&current->mm->mmap_sem);
+               return -EFAULT;
+       }
+
        if (is_vm_hugetlb_page(vma)) {
                hugetlb = true;
                gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
@@ -810,7 +895,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
        if (is_error_pfn(pfn))
                return -EFAULT;
 
-       if (kvm_is_mmio_pfn(pfn))
+       if (kvm_is_device_pfn(pfn))
                mem_type = PAGE_S2_DEVICE;
 
        spin_lock(&kvm->mmu_lock);
@@ -836,7 +921,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
                }
                coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
                ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
-                                    mem_type == PAGE_S2_DEVICE);
+                       pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
        }
 
 
@@ -912,6 +997,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
        memslot = gfn_to_memslot(vcpu->kvm, gfn);
 
+       /* Userspace should not be able to register out-of-bounds IPAs */
+       VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+
        ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
        if (ret == 0)
                ret = 1;
@@ -1136,6 +1224,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_userspace_memory_region *mem,
                                   enum kvm_mr_change change)
 {
+       /*
+        * Prevent userspace from creating a memory region outside of the IPA
+        * space addressable by the KVM guest IPA space.
+        */
+       if (memslot->base_gfn + memslot->npages >=
+           (KVM_PHYS_SIZE >> PAGE_SHIFT))
+               return -EFAULT;
+
        return 0;
 }
 
diff --git a/arch/arm/mach-dove/board-dt.c b/arch/arm/mach-dove/board-dt.c
index 49fa9abd09da..7a7a09a5d5ff 100644
--- a/arch/arm/mach-dove/board-dt.c
+++ b/arch/arm/mach-dove/board-dt.c
@@ -26,7 +26,7 @@ static void __init dove_dt_init(void)
 #ifdef CONFIG_CACHE_TAUROS2
        tauros2_init(0);
 #endif
-       BUG_ON(mvebu_mbus_dt_init());
+       BUG_ON(mvebu_mbus_dt_init(false));
        of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index 01a5765a8b26..b509556f6cfd 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -406,7 +406,7 @@ static void __init imx6q_clocks_init(struct device_node 
*ccm_node)
        clk[gpmi_io]      = imx_clk_gate2("gpmi_io",       "enfc",              
base + 0x78, 28);
        clk[gpmi_apb]     = imx_clk_gate2("gpmi_apb",      "usdhc3",            
base + 0x78, 30);
        clk[rom]          = imx_clk_gate2("rom",           "ahb",               
base + 0x7c, 0);
-       clk[sata]         = imx_clk_gate2("sata",          "ipg",               
base + 0x7c, 4);
+       clk[sata]         = imx_clk_gate2("sata",          "ahb",               
base + 0x7c, 4);
        clk[sdma]         = imx_clk_gate2("sdma",          "ahb",               
base + 0x7c, 6);
        clk[spba]         = imx_clk_gate2("spba",          "ipg",               
base + 0x7c, 12);
        clk[spdif]        = imx_clk_gate2("spdif",         "spdif_podf",        
base + 0x7c, 14);
diff --git a/arch/arm/mach-kirkwood/board-dt.c 
b/arch/arm/mach-kirkwood/board-dt.c
index 78188159484d..79e629da1c92 100644
--- a/arch/arm/mach-kirkwood/board-dt.c
+++ b/arch/arm/mach-kirkwood/board-dt.c
@@ -116,7 +116,7 @@ static void __init kirkwood_dt_init(void)
         */
        writel(readl(CPU_CONFIG) & ~CPU_CONFIG_ERROR_PROP, CPU_CONFIG);
 
-       BUG_ON(mvebu_mbus_dt_init());
+       BUG_ON(mvebu_mbus_dt_init(false));
 
        kirkwood_l2_init();
 
diff --git a/arch/arm/mach-mvebu/armada-370-xp.c 
b/arch/arm/mach-mvebu/armada-370-xp.c
index f6c9d1d85c14..79c3766a56fd 100644
--- a/arch/arm/mach-mvebu/armada-370-xp.c
+++ b/arch/arm/mach-mvebu/armada-370-xp.c
@@ -41,7 +41,7 @@ static void __init armada_370_xp_timer_and_clk_init(void)
        of_clk_init(NULL);
        clocksource_of_init();
        coherency_init();
-       BUG_ON(mvebu_mbus_dt_init());
+       BUG_ON(mvebu_mbus_dt_init(coherency_available()));
 #ifdef CONFIG_CACHE_L2X0
        l2x0_of_init(0, ~0UL);
 #endif
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index c295c10f9217..49bad4d66fa2 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -121,6 +121,20 @@ static struct notifier_block mvebu_hwcc_platform_nb = {
        .notifier_call = mvebu_hwcc_platform_notifier,
 };
 
+/*
+ * Keep track of whether we have IO hardware coherency enabled or not.
+ * On Armada 370's we will not be using it for example. We need to make
+ * that available [through coherency_available()] so the mbus controller
+ * doesn't enable the IO coherency bit in the attribute bits of the
+ * chip selects.
+ */
+static int coherency_enabled;
+
+int coherency_available(void)
+{
+       return coherency_enabled;
+}
+
 int __init coherency_init(void)
 {
        struct device_node *np;
@@ -164,6 +178,7 @@ int __init coherency_init(void)
                coherency_base = of_iomap(np, 0);
                coherency_cpu_base = of_iomap(np, 1);
                set_cpu_coherent(cpu_logical_map(smp_processor_id()), 0);
+               coherency_enabled = 1;
                of_node_put(np);
        }
 
diff --git a/arch/arm/mach-mvebu/coherency.h b/arch/arm/mach-mvebu/coherency.h
index 760226c41353..63e18c64a8e3 100644
--- a/arch/arm/mach-mvebu/coherency.h
+++ b/arch/arm/mach-mvebu/coherency.h
@@ -17,6 +17,7 @@
 extern unsigned long coherency_phys_base;
 
 int set_cpu_coherent(unsigned int cpu_id, int smp_group_id);
+int coherency_available(void);
 int coherency_init(void);
 
 #endif /* __MACH_370_XP_COHERENCY_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 00fbaa75dc7b..ea68925a4480 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -18,6 +18,7 @@
 #ifndef __ARM64_KVM_ARM_H__
 #define __ARM64_KVM_ARM_H__
 
+#include <asm/memory.h>
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
@@ -122,6 +123,17 @@
 #define VTCR_EL2_T0SZ_MASK     0x3f
 #define VTCR_EL2_T0SZ_40B      24
 
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
@@ -151,9 +163,9 @@
 #endif
 
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
-#define VTTBR_VMID_SHIFT  (48LLU)
-#define VTTBR_VMID_MASK          (0xffLLU << VTTBR_VMID_SHIFT)
+#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << 
VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (UL(48))
+#define VTTBR_VMID_MASK          (UL(0xFF) << VTTBR_VMID_SHIFT)
 
 /* Hyp System Trap Register */
 #define HSTR_EL2_TTEE  (1 << 16)
@@ -176,13 +188,13 @@
 
 /* Exception Syndrome Register (ESR) bits */
 #define ESR_EL2_EC_SHIFT       (26)
-#define ESR_EL2_EC             (0x3fU << ESR_EL2_EC_SHIFT)
-#define ESR_EL2_IL             (1U << 25)
+#define ESR_EL2_EC             (UL(0x3f) << ESR_EL2_EC_SHIFT)
+#define ESR_EL2_IL             (UL(1) << 25)
 #define ESR_EL2_ISS            (ESR_EL2_IL - 1)
 #define ESR_EL2_ISV_SHIFT      (24)
-#define ESR_EL2_ISV            (1U << ESR_EL2_ISV_SHIFT)
+#define ESR_EL2_ISV            (UL(1) << ESR_EL2_ISV_SHIFT)
 #define ESR_EL2_SAS_SHIFT      (22)
-#define ESR_EL2_SAS            (3U << ESR_EL2_SAS_SHIFT)
+#define ESR_EL2_SAS            (UL(3) << ESR_EL2_SAS_SHIFT)
 #define ESR_EL2_SSE            (1 << 21)
 #define ESR_EL2_SRT_SHIFT      (16)
 #define ESR_EL2_SRT_MASK       (0x1f << ESR_EL2_SRT_SHIFT)
@@ -196,16 +208,16 @@
 #define ESR_EL2_FSC_TYPE       (0x3c)
 
 #define ESR_EL2_CV_SHIFT       (24)
-#define ESR_EL2_CV             (1U << ESR_EL2_CV_SHIFT)
+#define ESR_EL2_CV             (UL(1) << ESR_EL2_CV_SHIFT)
 #define ESR_EL2_COND_SHIFT     (20)
-#define ESR_EL2_COND           (0xfU << ESR_EL2_COND_SHIFT)
+#define ESR_EL2_COND           (UL(0xf) << ESR_EL2_COND_SHIFT)
 
 
 #define FSC_FAULT      (0x04)
 #define FSC_PERM       (0x0c)
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
-#define HPFAR_MASK     (~0xFUL)
+#define HPFAR_MASK     (~UL(0xf))
 
 #define ESR_EL2_EC_UNKNOWN     (0x00)
 #define ESR_EL2_EC_WFI         (0x01)
diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index dd8ecfc3f995..681cb9080100 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
        return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8e138c7c53ac..0d51874c838f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
 #define KERN_TO_HYP(kva)       ((unsigned long)kva - PAGE_OFFSET + 
HYP_PAGE_OFFSET)
 
 /*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * We currently only support a 40bit IPA.
  */
-#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT
+#define KVM_PHYS_SHIFT (40)
 #define KVM_PHYS_SIZE  (1UL << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK  (KVM_PHYS_SIZE - 1UL)
 
@@ -75,6 +74,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -93,19 +93,6 @@ void kvm_clear_hyp_idmap(void);
 #define        kvm_set_pte(ptep, pte)          set_pte(ptep, pte)
 #define        kvm_set_pmd(pmdp, pmd)          set_pmd(pmdp, pmd)
 
-static inline bool kvm_is_write_fault(unsigned long esr)
-{
-       unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
-
-       if (esr_ec == ESR_EL2_EC_IABT)
-               return false;
-
-       if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
-               return false;
-
-       return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd) {}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
 static inline void kvm_clean_pte(pte_t *pte) {}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 08745578d54d..a8d81fa8c527 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
        return 0;
 }
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3974881388bb..b76159a153a5 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -54,8 +54,7 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, 
size_t size,
 
                *dma_handle = phys_to_dma(dev, page_to_phys(page));
                addr = page_address(page);
-               if (flags & __GFP_ZERO)
-                       memset(addr, 0, size);
+               memset(addr, 0, size);
                return addr;
        } else {
                return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2f645c90e4d8..5dab54accc56 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -160,7 +160,7 @@ config SBUS
 
 config NEED_DMA_MAP_STATE
        def_bool y
-       depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
+       depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB
 
 config NEED_SG_DMA_LENGTH
        def_bool y
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c 
b/arch/x86/kernel/cpu/microcode/intel_early.c
index 18f739129e72..43a07bf48dea 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -321,7 +321,7 @@ get_matching_model_microcode(int cpu, unsigned long start,
        unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
        int i;
 
-       while (leftover) {
+       while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
                mc_header = (struct microcode_header_intel *)ucode_ptr;
 
                mc_size = get_totalsize(mc_header);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index a1f5b1866cbe..490fee15fea5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -326,13 +326,16 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
 {
        struct insn insn;
        kprobe_opcode_t buf[MAX_INSN_SIZE];
+       int length;
 
        kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, 
(unsigned long)src));
        insn_get_length(&insn);
+       length = insn.length;
+
        /* Another subsystem puts a breakpoint, failed to recover */
        if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
                return 0;
-       memcpy(dest, insn.kaddr, insn.length);
+       memcpy(dest, insn.kaddr, length);
 
 #ifdef CONFIG_X86_64
        if (insn_rip_relative(&insn)) {
@@ -362,7 +365,7 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
                *(s32 *) disp = (s32) newdisp;
        }
 #endif
-       return insn.length;
+       return length;
 }
 
 static int __kprobes arch_copy_kprobe(struct kprobe *p)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9643eda60a52..074633411ea8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -495,8 +495,10 @@ static void skip_emulated_instruction(struct kvm_vcpu 
*vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       if (svm->vmcb->control.next_rip != 0)
+       if (svm->vmcb->control.next_rip != 0) {
+               WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS));
                svm->next_rip = svm->vmcb->control.next_rip;
+       }
 
        if (!svm->next_rip) {
                if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
@@ -4246,7 +4248,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
                break;
        }
 
-       vmcb->control.next_rip  = info->next_rip;
+       /* TODO: Advertise NRIPS to guest hypervisor unconditionally */
+       if (static_cpu_has(X86_FEATURE_NRIPS))
+               vmcb->control.next_rip  = info->next_rip;
        vmcb->control.exit_code = icpt_info.exit_code;
        vmexit = nested_svm_exit_handled(svm);
 
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index e990deed2d33..1aa0130a63d5 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -701,7 +701,6 @@ static int __init mvebu_mbus_common_init(struct 
mvebu_mbus_state *mbus,
                                         phys_addr_t sdramwins_phys_base,
                                         size_t sdramwins_size)
 {
-       struct device_node *np;
        int win;
 
        mbus->mbuswins_base = ioremap(mbuswins_phys_base, mbuswins_size);
@@ -714,12 +713,6 @@ static int __init mvebu_mbus_common_init(struct 
mvebu_mbus_state *mbus,
                return -ENOMEM;
        }
 
-       np = of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric");
-       if (np) {
-               mbus->hw_io_coherency = 1;
-               of_node_put(np);
-       }
-
        for (win = 0; win < mbus->soc->num_wins; win++)
                mvebu_mbus_disable_window(mbus, win);
 
@@ -889,7 +882,7 @@ static void __init mvebu_mbus_get_pcie_resources(struct 
device_node *np,
        }
 }
 
-int __init mvebu_mbus_dt_init(void)
+int __init mvebu_mbus_dt_init(bool is_coherent)
 {
        struct resource mbuswins_res, sdramwins_res;
        struct device_node *np, *controller;
@@ -928,6 +921,8 @@ int __init mvebu_mbus_dt_init(void)
                return -EINVAL;
        }
 
+       mbus_state.hw_io_coherency = is_coherent;
+
        /* Get optional pcie-{mem,io}-aperture properties */
        mvebu_mbus_get_pcie_resources(np, &mbus_state.pcie_mem_aperture,
                                          &mbus_state.pcie_io_aperture);
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index c611bcc01f7e..3e623ab5e315 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -765,7 +765,7 @@ static void get_memory_layout(const struct mem_ctl_info 
*mci)
        u32 reg;
        u64 limit, prv = 0;
        u64 tmp_mb;
-       u32 mb, kb;
+       u32 gb, mb;
        u32 rir_way;
 
        /*
@@ -775,15 +775,17 @@ static void get_memory_layout(const struct mem_ctl_info 
*mci)
        pvt->tolm = pvt->info.get_tolm(pvt);
        tmp_mb = (1 + pvt->tolm) >> 20;
 
-       mb = div_u64_rem(tmp_mb, 1000, &kb);
-       edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm);
+       gb = div_u64_rem(tmp_mb, 1024, &mb);
+       edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n",
+               gb, (mb*1000)/1024, (u64)pvt->tolm);
 
        /* Address range is already 45:25 */
        pvt->tohm = pvt->info.get_tohm(pvt);
        tmp_mb = (1 + pvt->tohm) >> 20;
 
-       mb = div_u64_rem(tmp_mb, 1000, &kb);
-       edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
+       gb = div_u64_rem(tmp_mb, 1024, &mb);
+       edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n",
+               gb, (mb*1000)/1024, (u64)pvt->tohm);
 
        /*
         * Step 2) Get SAD range and SAD Interleave list
@@ -805,11 +807,11 @@ static void get_memory_layout(const struct mem_ctl_info 
*mci)
                        break;
 
                tmp_mb = (limit + 1) >> 20;
-               mb = div_u64_rem(tmp_mb, 1000, &kb);
+               gb = div_u64_rem(tmp_mb, 1024, &mb);
                edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: 
%s reg=0x%08x\n",
                         n_sads,
                         get_dram_attr(reg),
-                        mb, kb,
+                        gb, (mb*1000)/1024,
                         ((u64)tmp_mb) << 20L,
                         INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]",
                         reg);
@@ -840,9 +842,9 @@ static void get_memory_layout(const struct mem_ctl_info 
*mci)
                        break;
                tmp_mb = (limit + 1) >> 20;
 
-               mb = div_u64_rem(tmp_mb, 1000, &kb);
+               gb = div_u64_rem(tmp_mb, 1024, &mb);
                edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket 
interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
-                        n_tads, mb, kb,
+                        n_tads, gb, (mb*1000)/1024,
                         ((u64)tmp_mb) << 20L,
                         (u32)TAD_SOCK(reg),
                         (u32)TAD_CH(reg),
@@ -865,10 +867,10 @@ static void get_memory_layout(const struct mem_ctl_info 
*mci)
                                              tad_ch_nilv_offset[j],
                                              &reg);
                        tmp_mb = TAD_OFFSET(reg) >> 20;
-                       mb = div_u64_rem(tmp_mb, 1000, &kb);
+                       gb = div_u64_rem(tmp_mb, 1024, &mb);
                        edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB 
(0x%016Lx), reg=0x%08x\n",
                                 i, j,
-                                mb, kb,
+                                gb, (mb*1000)/1024,
                                 ((u64)tmp_mb) << 20L,
                                 reg);
                }
@@ -890,10 +892,10 @@ static void get_memory_layout(const struct mem_ctl_info 
*mci)
 
                        tmp_mb = RIR_LIMIT(reg) >> 20;
                        rir_way = 1 << RIR_WAY(reg);
-                       mb = div_u64_rem(tmp_mb, 1000, &kb);
+                       gb = div_u64_rem(tmp_mb, 1024, &mb);
                        edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB 
(0x%016Lx), way: %d, reg=0x%08x\n",
                                 i, j,
-                                mb, kb,
+                                gb, (mb*1000)/1024,
                                 ((u64)tmp_mb) << 20L,
                                 rir_way,
                                 reg);
@@ -904,10 +906,10 @@ static void get_memory_layout(const struct mem_ctl_info 
*mci)
                                                      &reg);
                                tmp_mb = RIR_OFFSET(reg) << 6;
 
-                               mb = div_u64_rem(tmp_mb, 1000, &kb);
+                               gb = div_u64_rem(tmp_mb, 1024, &mb);
                                edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset 
%u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
                                         i, j, k,
-                                        mb, kb,
+                                        gb, (mb*1000)/1024,
                                         ((u64)tmp_mb) << 20L,
                                         (u32)RIR_RNK_TGT(reg),
                                         reg);
@@ -945,7 +947,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
        u8                      ch_way, sck_way, pkg, sad_ha = 0;
        u32                     tad_offset;
        u32                     rir_way;
-       u32                     mb, kb;
+       u32                     mb, gb;
        u64                     ch_addr, offset, limit = 0, prv = 0;
 
 
@@ -1183,10 +1185,10 @@ static int get_memory_error_data(struct mem_ctl_info 
*mci,
                        continue;
 
                limit = RIR_LIMIT(reg);
-               mb = div_u64_rem(limit >> 20, 1000, &kb);
+               gb = div_u64_rem(limit >> 20, 1024, &mb);
                edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
                         n_rir,
-                        mb, kb,
+                        gb, (mb*1000)/1024,
                         limit,
                         1 << RIR_WAY(reg));
                if  (ch_addr <= limit)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 019a04a31384..a467261b10b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -810,8 +810,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct 
net_device *dev)
        tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
        tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
        if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-               tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
-                                                        
MLX4_WQE_CTRL_TCP_UDP_CSUM);
+               if (!skb->encapsulation)
+                       tx_desc->ctrl.srcrb_flags |= 
cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+                                                                
MLX4_WQE_CTRL_TCP_UDP_CSUM);
+               else
+                       tx_desc->ctrl.srcrb_flags |= 
cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM);
                ring->tx_csum++;
        }
 
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 528bff5ec91f..85d370e1ca79 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3984,10 +3984,6 @@ static int hpsa_kdump_hard_reset_controller(struct 
pci_dev *pdev)
 
        /* Save the PCI command register */
        pci_read_config_word(pdev, 4, &command_register);
-       /* Turn the board off.  This is so that later pci_restore_state()
-        * won't turn the board on before the rest of config space is ready.
-        */
-       pci_disable_device(pdev);
        pci_save_state(pdev);
 
        /* find the first memory BAR, so we can find the cfg table */
@@ -4035,11 +4031,6 @@ static int hpsa_kdump_hard_reset_controller(struct 
pci_dev *pdev)
                goto unmap_cfgtable;
 
        pci_restore_state(pdev);
-       rc = pci_enable_device(pdev);
-       if (rc) {
-               dev_warn(&pdev->dev, "failed to enable device.\n");
-               goto unmap_cfgtable;
-       }
        pci_write_config_word(pdev, 4, command_register);
 
        /* Some devices (notably the HP Smart Array 5i Controller)
@@ -4525,6 +4516,23 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
        if (!reset_devices)
                return 0;
 
+       /* kdump kernel is loading, we don't know in which state is
+        * the pci interface. The dev->enable_cnt is equal zero
+        * so we call enable+disable, wait a while and switch it on.
+        */
+       rc = pci_enable_device(pdev);
+       if (rc) {
+               dev_warn(&pdev->dev, "Failed to enable PCI device\n");
+               return -ENODEV;
+       }
+       pci_disable_device(pdev);
+       msleep(260);                    /* a randomly chosen number */
+       rc = pci_enable_device(pdev);
+       if (rc) {
+               dev_warn(&pdev->dev, "failed to enable device.\n");
+               return -ENODEV;
+       }
+       pci_set_master(pdev);
        /* Reset the controller with a PCI power-cycle or via doorbell */
        rc = hpsa_kdump_hard_reset_controller(pdev);
 
@@ -4533,10 +4541,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
         * "performant mode".  Or, it might be 640x, which can't reset
         * due to concerns about shared bbwc between 6402/6404 pair.
         */
-       if (rc == -ENOTSUPP)
-               return rc; /* just try to do the kdump anyhow. */
-       if (rc)
-               return -ENODEV;
+       if (rc) {
+               if (rc != -ENOTSUPP) /* just try to do the kdump anyhow. */
+                       rc = -ENODEV;
+               goto out_disable;
+       }
 
        /* Now try to get the controller to respond to a no-op */
        dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
@@ -4547,7 +4556,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev)
                        dev_warn(&pdev->dev, "no-op failed%s\n",
                                        (i < 11 ? "; re-trying" : ""));
        }
-       return 0;
+
+out_disable:
+
+       pci_disable_device(pdev);
+       return rc;
 }
 
 static int hpsa_allocate_cmd_pool(struct ctlr_info *h)
@@ -4690,6 +4703,7 @@ static void 
hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
                iounmap(h->transtable);
        if (h->cfgtable)
                iounmap(h->cfgtable);
+       pci_disable_device(h->pdev);
        pci_release_regions(h->pdev);
        kfree(h);
 }
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 93de3ba994e7..f8ffee4562d3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2963,7 +2963,7 @@ done:
         */
        if (!p->leave_spinning)
                btrfs_set_path_blocking(p);
-       if (ret < 0)
+       if (ret < 0 && !p->skip_release_on_error)
                btrfs_release_path(p);
        return ret;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d3511cc17091..3b39eb4cb309 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -608,6 +608,7 @@ struct btrfs_path {
        unsigned int skip_locking:1;
        unsigned int leave_spinning:1;
        unsigned int search_commit_root:1;
+       unsigned int skip_release_on_error:1;
 };
 
 /*
@@ -3609,6 +3610,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct 
btrfs_trans_handle *trans,
 int verify_dir_item(struct btrfs_root *root,
                    struct extent_buffer *leaf,
                    struct btrfs_dir_item *dir_item);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+                                                struct btrfs_path *path,
+                                                const char *name,
+                                                int name_len);
 
 /* orphan.c */
 int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index a0691df5dcea..9521a93b5303 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,10 +21,6 @@
 #include "hash.h"
 #include "transaction.h"
 
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root 
*root,
-                             struct btrfs_path *path,
-                             const char *name, int name_len);
-
 /*
  * insert a name into a directory, doing overflow properly if there is a hash
  * collision.  data_size indicates how big the item inserted should be.  On
@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct 
btrfs_trans_handle *trans,
  * this walks through all the entries in a dir item and finds one
  * for a specific name.
  */
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root 
*root,
-                             struct btrfs_path *path,
-                             const char *name, int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+                                                struct btrfs_path *path,
+                                                const char *name, int name_len)
 {
        struct btrfs_dir_item *dir_item;
        unsigned long name_ptr;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 488e987c3374..618e86ceede7 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -29,6 +29,7 @@
 #include "xattr.h"
 #include "disk-io.h"
 #include "props.h"
+#include "locking.h"
 
 
 ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -91,7 +92,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                       struct inode *inode, const char *name,
                       const void *value, size_t size, int flags)
 {
-       struct btrfs_dir_item *di;
+       struct btrfs_dir_item *di = NULL;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_path *path;
        size_t name_len = strlen(name);
@@ -103,84 +104,119 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+       path->skip_release_on_error = 1;
+
+       if (!value) {
+               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+                                       name, name_len, -1);
+               if (!di && (flags & XATTR_REPLACE))
+                       ret = -ENODATA;
+               else if (di)
+                       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+               goto out;
+       }
 
+       /*
+        * For a replace we can't just do the insert blindly.
+        * Do a lookup first (read-only btrfs_search_slot), and return if xattr
+        * doesn't exist. If it exists, fall down below to the insert/replace
+        * path - we can't race with a concurrent xattr delete, because the VFS
+        * locks the inode's i_mutex before calling setxattr or removexattr.
+        */
        if (flags & XATTR_REPLACE) {
-               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), 
name,
-                                       name_len, -1);
-               if (IS_ERR(di)) {
-                       ret = PTR_ERR(di);
-                       goto out;
-               } else if (!di) {
+               ASSERT(mutex_is_locked(&inode->i_mutex));
+               di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
+                                       name, name_len, 0);
+               if (!di) {
                        ret = -ENODATA;
                        goto out;
                }
-               ret = btrfs_delete_one_dir_name(trans, root, path, di);
-               if (ret)
-                       goto out;
                btrfs_release_path(path);
+               di = NULL;
+       }
 
+       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+                                     name, name_len, value, size);
+       if (ret == -EOVERFLOW) {
                /*
-                * remove the attribute
+                * We have an existing item in a leaf, split_leaf couldn't
+                * expand it. That item might have or not a dir_item that
+                * matches our target xattr, so lets check.
                 */
-               if (!value)
-                       goto out;
-       } else {
-               di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
-                                       name, name_len, 0);
-               if (IS_ERR(di)) {
-                       ret = PTR_ERR(di);
+               ret = 0;
+               btrfs_assert_tree_locked(path->nodes[0]);
+               di = btrfs_match_dir_item_name(root, path, name, name_len);
+               if (!di && !(flags & XATTR_REPLACE)) {
+                       ret = -ENOSPC;
                        goto out;
                }
-               if (!di && !value)
-                       goto out;
-               btrfs_release_path(path);
+       } else if (ret == -EEXIST) {
+               ret = 0;
+               di = btrfs_match_dir_item_name(root, path, name, name_len);
+               ASSERT(di); /* logic error */
+       } else if (ret) {
+               goto out;
        }
 
-again:
-       ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-                                     name, name_len, value, size);
-       /*
-        * If we're setting an xattr to a new value but the new value is say
-        * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
-        * back from split_leaf.  This is because it thinks we'll be extending
-        * the existing item size, but we're asking for enough space to add the
-        * item itself.  So if we get EOVERFLOW just set ret to EEXIST and let
-        * the rest of the function figure it out.
-        */
-       if (ret == -EOVERFLOW)
+       if (di && (flags & XATTR_CREATE)) {
                ret = -EEXIST;
+               goto out;
+       }
 
-       if (ret == -EEXIST) {
-               if (flags & XATTR_CREATE)
-                       goto out;
+       if (di) {
                /*
-                * We can't use the path we already have since we won't have the
-                * proper locking for a delete, so release the path and
-                * re-lookup to delete the thing.
+                * We're doing a replace, and it must be atomic, that is, at
+                * any point in time we have either the old or the new xattr
+                * value in the tree. We don't want readers (getxattr and
+                * listxattrs) to miss a value, this is specially important
+                * for ACLs.
                 */
-               btrfs_release_path(path);
-               di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
-                                       name, name_len, -1);
-               if (IS_ERR(di)) {
-                       ret = PTR_ERR(di);
-                       goto out;
-               } else if (!di) {
-                       /* Shouldn't happen but just in case... */
-                       btrfs_release_path(path);
-                       goto again;
+               const int slot = path->slots[0];
+               struct extent_buffer *leaf = path->nodes[0];
+               const u16 old_data_len = btrfs_dir_data_len(leaf, di);
+               const u32 item_size = btrfs_item_size_nr(leaf, slot);
+               const u32 data_size = sizeof(*di) + name_len + size;
+               struct btrfs_item *item;
+               unsigned long data_ptr;
+               char *ptr;
+
+               if (size > old_data_len) {
+                       if (btrfs_leaf_free_space(root, leaf) <
+                           (size - old_data_len)) {
+                               ret = -ENOSPC;
+                               goto out;
+                       }
                }
 
-               ret = btrfs_delete_one_dir_name(trans, root, path, di);
-               if (ret)
-                       goto out;
+               if (old_data_len + name_len + sizeof(*di) == item_size) {
+                       /* No other xattrs packed in the same leaf item. */
+                       if (size > old_data_len)
+                               btrfs_extend_item(root, path,
+                                                 size - old_data_len);
+                       else if (size < old_data_len)
+                               btrfs_truncate_item(root, path, data_size, 1);
+               } else {
+                       /* There are other xattrs packed in the same item. */
+                       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+                       if (ret)
+                               goto out;
+                       btrfs_extend_item(root, path, data_size);
+               }
 
+               item = btrfs_item_nr(slot);
+               ptr = btrfs_item_ptr(leaf, slot, char);
+               ptr += btrfs_item_size(leaf, item) - data_size;
+               di = (struct btrfs_dir_item *)ptr;
+               btrfs_set_dir_data_len(leaf, di, size);
+               data_ptr = ((unsigned long)(di + 1)) + name_len;
+               write_extent_buffer(leaf, value, data_ptr, size);
+               btrfs_mark_buffer_dirty(leaf);
+       } else {
                /*
-                * We have a value to set, so go back and try to insert it now.
+                * Insert, and we had space for the xattr, so path->slots[0] is
+                * where our xattr dir_item is and btrfs_insert_xattr_item()
+                * filled it.
                 */
-               if (value) {
-                       btrfs_release_path(path);
-                       goto again;
-               }
        }
 out:
        btrfs_free_path(path);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7fe30f655aa5..35f54bc96519 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2478,9 +2478,7 @@ static ssize_t ocfs2_file_splice_write(struct 
pipe_inode_info *pipe,
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
        struct splice_desc sd = {
-               .total_len = len,
                .flags = flags,
-               .pos = *ppos,
                .u.file = out,
        };
 
@@ -2490,6 +2488,12 @@ static ssize_t ocfs2_file_splice_write(struct 
pipe_inode_info *pipe,
                        out->f_path.dentry->d_name.len,
                        out->f_path.dentry->d_name.name, len);
 
+       ret = generic_write_checks(out, ppos, &len, 0);
+       if (ret)
+               return ret;
+       sd.total_len = len;
+       sd.pos = *ppos;
+
        pipe_lock(pipe);
 
        splice_from_pipe_begin(&sd);
diff --git a/fs/splice.c b/fs/splice.c
index 12028fa41def..f345d53f94da 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1012,13 +1012,17 @@ generic_file_splice_write(struct pipe_inode_info *pipe, 
struct file *out,
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
        struct splice_desc sd = {
-               .total_len = len,
                .flags = flags,
-               .pos = *ppos,
                .u.file = out,
        };
        ssize_t ret;
 
+       ret = generic_write_checks(out, ppos, &len, S_ISBLK(inode->i_mode));
+       if (ret)
+               return ret;
+       sd.total_len = len;
+       sd.pos = *ppos;
+
        pipe_lock(pipe);
 
        splice_from_pipe_begin(&sd);
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 345b8c53b897..550c88fb0267 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -73,6 +73,6 @@ int mvebu_mbus_del_window(phys_addr_t base, size_t size);
 int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base,
                    size_t mbus_size, phys_addr_t sdram_phys_base,
                    size_t sdram_size);
-int mvebu_mbus_dt_init(void);
+int mvebu_mbus_dt_init(bool is_coherent);
 
 #endif /* __LINUX_MBUS_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c68e5e0628df..99de2409f731 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -855,7 +855,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct 
sk_buff *skb,
 
        if (nla[NFTA_CHAIN_POLICY]) {
                if ((chain != NULL &&
-                   !(chain->flags & NFT_BASE_CHAIN)) ||
+                   !(chain->flags & NFT_BASE_CHAIN)))
+                       return -EOPNOTSUPP;
+
+               if (chain == NULL &&
                    nla[NFTA_CHAIN_HOOK] == NULL)
                        return -EOPNOTSUPP;
 
diff --git a/net/netfilter/nfnetlink_cthelper.c 
b/net/netfilter/nfnetlink_cthelper.c
index 9e287cb56a04..54330fb5efaf 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
        if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
                return -EINVAL;
 
+       /* Not all fields are initialized so first zero the tuple */
+       memset(tuple, 0, sizeof(struct nf_conntrack_tuple));
+
        tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
        tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
 
@@ -86,7 +89,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
 static int
 nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
 {
-       const struct nf_conn_help *help = nfct_help(ct);
+       struct nf_conn_help *help = nfct_help(ct);
 
        if (attr == NULL)
                return -EINVAL;
@@ -94,7 +97,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn 
*ct)
        if (help->helper->data_len == 0)
                return -EINVAL;
 
-       memcpy(&help->data, nla_data(attr), help->helper->data_len);
+       memcpy(help->data, nla_data(attr), help->helper->data_len);
        return 0;
 }
 
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7350723aeb15..969589590814 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -82,6 +82,9 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
                entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
                break;
        case AF_INET6:
+               if (proto)
+                       entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
                entry->e6.ipv6.proto = proto;
                entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
                break;
@@ -313,6 +316,9 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const 
struct nft_ctx *ctx,
                entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
                break;
        case AF_INET6:
+               if (proto)
+                       entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
                entry->e6.ipv6.proto = proto;
                entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
                break;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 1316e558db64..c324a52bb407 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -674,7 +674,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu 
*vcpu,
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        int sgi;
-       int min_sgi = (offset & ~0x3) * 4;
+       int min_sgi = (offset & ~0x3);
        int max_sgi = min_sgi + 3;
        int vcpu_id = vcpu->vcpu_id;
        u32 reg = 0;
@@ -695,7 +695,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu 
*vcpu,
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        int sgi;
-       int min_sgi = (offset & ~0x3) * 4;
+       int min_sgi = (offset & ~0x3);
        int max_sgi = min_sgi + 3;
        int vcpu_id = vcpu->vcpu_id;
        u32 reg;
@@ -1387,7 +1387,8 @@ out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
                        bool level)
 {
-       if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+       if (likely(vgic_initialized(kvm)) &&
+           vgic_update_irq_state(kvm, cpuid, irq_num, level))
                vgic_kick_vcpus(kvm);
 
        return 0;
@@ -1610,7 +1611,7 @@ out:
 
 int kvm_vgic_create(struct kvm *kvm)
 {
-       int i, vcpu_lock_idx = -1, ret = 0;
+       int i, vcpu_lock_idx = -1, ret;
        struct kvm_vcpu *vcpu;
 
        mutex_lock(&kvm->lock);
@@ -1625,6 +1626,7 @@ int kvm_vgic_create(struct kvm *kvm)
         * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
         * that no other VCPUs are run while we create the vgic.
         */
+       ret = -EBUSY;
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (!mutex_trylock(&vcpu->mutex))
                        goto out_unlock;
@@ -1632,11 +1634,10 @@ int kvm_vgic_create(struct kvm *kvm)
        }
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
-               if (vcpu->arch.has_run_once) {
-                       ret = -EBUSY;
+               if (vcpu->arch.has_run_once)
                        goto out_unlock;
-               }
        }
+       ret = 0;
 
        spin_lock_init(&kvm->arch.vgic.lock);
        kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Re: Linux 3.14.47

Reply via email to