commit:     b9b0a047e12066039346071e8ee8efa10ce0984c
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Tue Dec  5 11:39:39 2017 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Tue Dec  5 11:39:39 2017 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=b9b0a047

Linux patch 4.4.104

 0000_README              |    4 +
 1103_linux-4.4.104.patch | 1469 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1473 insertions(+)

diff --git a/0000_README b/0000_README
index 21ecaf0..a31f5b0 100644
--- a/0000_README
+++ b/0000_README
@@ -455,6 +455,10 @@ Patch:  1102_linux-4.4.103.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.4.103
 
+Patch:  1103_linux-4.4.104.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.4.104
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1103_linux-4.4.104.patch b/1103_linux-4.4.104.patch
new file mode 100644
index 0000000..49bb49f
--- /dev/null
+++ b/1103_linux-4.4.104.patch
@@ -0,0 +1,1469 @@
+diff --git a/Makefile b/Makefile
+index f5a51cd7ca49..55500e023f61 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 4
+-SUBLEVEL = 103
++SUBLEVEL = 104
+ EXTRAVERSION =
+ NAME = Blurry Fish Butt
+ 
+diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts 
b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+index 5b0430041ec6..fec92cd36ae3 100644
+--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
++++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+@@ -88,7 +88,7 @@
+       interrupts-extended = <&intc 83 &omap3_pmx_core 0x11a>;
+       pinctrl-names = "default";
+       pinctrl-0 = <&mmc1_pins &mmc1_cd>;
+-      cd-gpios = <&gpio4 31 IRQ_TYPE_LEVEL_LOW>;              /* gpio127 */
++      cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>;         /* gpio127 */
+       vmmc-supply = <&vmmc1>;
+       bus-width = <4>;
+       cap-power-off-card;
+diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
+index 0010c78c4998..8fd9e637629a 100644
+--- a/arch/x86/include/asm/efi.h
++++ b/arch/x86/include/asm/efi.h
+@@ -3,6 +3,7 @@
+ 
+ #include <asm/fpu/api.h>
+ #include <asm/pgtable.h>
++#include <asm/tlb.h>
+ 
+ /*
+  * We map the EFI regions needed for runtime services non-contiguously,
+@@ -64,6 +65,17 @@ extern u64 asmlinkage efi_call(void *fp, ...);
+ 
+ #define efi_call_phys(f, args...)             efi_call((f), args)
+ 
++/*
++ * Scratch space used for switching the pagetable in the EFI stub
++ */
++struct efi_scratch {
++      u64     r15;
++      u64     prev_cr3;
++      pgd_t   *efi_pgt;
++      bool    use_pgd;
++      u64     phys_stack;
++} __packed;
++
+ #define efi_call_virt(f, ...)                                         \
+ ({                                                                    \
+       efi_status_t __s;                                               \
+@@ -71,7 +83,20 @@ extern u64 asmlinkage efi_call(void *fp, ...);
+       efi_sync_low_kernel_mappings();                                 \
+       preempt_disable();                                              \
+       __kernel_fpu_begin();                                           \
++                                                                      \
++      if (efi_scratch.use_pgd) {                                      \
++              efi_scratch.prev_cr3 = read_cr3();                      \
++              write_cr3((unsigned long)efi_scratch.efi_pgt);          \
++              __flush_tlb_all();                                      \
++      }                                                               \
++                                                                      \
+       __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__);    \
++                                                                      \
++      if (efi_scratch.use_pgd) {                                      \
++              write_cr3(efi_scratch.prev_cr3);                        \
++              __flush_tlb_all();                                      \
++      }                                                               \
++                                                                      \
+       __kernel_fpu_end();                                             \
+       preempt_enable();                                               \
+       __s;                                                            \
+@@ -111,6 +136,7 @@ extern void __init efi_memory_uc(u64 addr, unsigned long 
size);
+ extern void __init efi_map_region(efi_memory_desc_t *md);
+ extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
+ extern void efi_sync_low_kernel_mappings(void);
++extern int __init efi_alloc_page_tables(void);
+ extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned 
num_pages);
+ extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned 
num_pages);
+ extern void __init old_map_region(efi_memory_desc_t *md);
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 4e1b254c3695..4b1152e57340 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1696,6 +1696,8 @@ static int ud_interception(struct vcpu_svm *svm)
+       int er;
+ 
+       er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
++      if (er == EMULATE_USER_EXIT)
++              return 0;
+       if (er != EMULATE_DONE)
+               kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+       return 1;
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 67ba0d8f87c7..253a8c8207bb 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -5267,6 +5267,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
+                       return 1;
+               }
+               er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
++              if (er == EMULATE_USER_EXIT)
++                      return 0;
+               if (er != EMULATE_DONE)
+                       kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 3ffd5900da5b..df81717a92f3 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1812,6 +1812,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
+        */
+       BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+ 
++      if (guest_hv_clock.version & 1)
++              ++guest_hv_clock.version;  /* first time write, random junk */
++
+       vcpu->hv_clock.version = guest_hv_clock.version + 1;
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+@@ -5426,6 +5429,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
+                       if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+                                               emulation_type))
+                               return EMULATE_DONE;
++                      if (ctxt->have_exception && 
inject_emulated_exception(vcpu))
++                              return EMULATE_DONE;
+                       if (emulation_type & EMULTYPE_SKIP)
+                               return EMULATE_FAIL;
+                       return handle_emulation_failure(vcpu);
+diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
+index b599a780a5a9..a0fe62e3f4a3 100644
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -911,15 +911,10 @@ static void populate_pte(struct cpa_data *cpa,
+       pte = pte_offset_kernel(pmd, start);
+ 
+       while (num_pages-- && start < end) {
+-
+-              /* deal with the NX bit */
+-              if (!(pgprot_val(pgprot) & _PAGE_NX))
+-                      cpa->pfn &= ~_PAGE_NX;
+-
+-              set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot));
++              set_pte(pte, pfn_pte(cpa->pfn, pgprot));
+ 
+               start    += PAGE_SIZE;
+-              cpa->pfn += PAGE_SIZE;
++              cpa->pfn++;
+               pte++;
+       }
+ }
+@@ -975,11 +970,11 @@ static int populate_pmd(struct cpa_data *cpa,
+ 
+               pmd = pmd_offset(pud, start);
+ 
+-              set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE |
++              set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
+                                  massage_pgprot(pmd_pgprot)));
+ 
+               start     += PMD_SIZE;
+-              cpa->pfn  += PMD_SIZE;
++              cpa->pfn  += PMD_SIZE >> PAGE_SHIFT;
+               cur_pages += PMD_SIZE >> PAGE_SHIFT;
+       }
+ 
+@@ -1048,11 +1043,11 @@ static int populate_pud(struct cpa_data *cpa, unsigned 
long start, pgd_t *pgd,
+        * Map everything starting from the Gb boundary, possibly with 1G pages
+        */
+       while (end - start >= PUD_SIZE) {
+-              set_pud(pud, __pud(cpa->pfn | _PAGE_PSE |
++              set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
+                                  massage_pgprot(pud_pgprot)));
+ 
+               start     += PUD_SIZE;
+-              cpa->pfn  += PUD_SIZE;
++              cpa->pfn  += PUD_SIZE >> PAGE_SHIFT;
+               cur_pages += PUD_SIZE >> PAGE_SHIFT;
+               pud++;
+       }
+diff --git a/arch/x86/platform/efi/efi-bgrt.c 
b/arch/x86/platform/efi/efi-bgrt.c
+index ea48449b2e63..64fbc7e33226 100644
+--- a/arch/x86/platform/efi/efi-bgrt.c
++++ b/arch/x86/platform/efi/efi-bgrt.c
+@@ -28,8 +28,7 @@ struct bmp_header {
+ void __init efi_bgrt_init(void)
+ {
+       acpi_status status;
+-      void __iomem *image;
+-      bool ioremapped = false;
++      void *image;
+       struct bmp_header bmp_header;
+ 
+       if (acpi_disabled)
+@@ -70,20 +69,14 @@ void __init efi_bgrt_init(void)
+               return;
+       }
+ 
+-      image = efi_lookup_mapped_addr(bgrt_tab->image_address);
++      image = memremap(bgrt_tab->image_address, sizeof(bmp_header), 
MEMREMAP_WB);
+       if (!image) {
+-              image = early_ioremap(bgrt_tab->image_address,
+-                                     sizeof(bmp_header));
+-              ioremapped = true;
+-              if (!image) {
+-                      pr_err("Ignoring BGRT: failed to map image header 
memory\n");
+-                      return;
+-              }
++              pr_err("Ignoring BGRT: failed to map image header memory\n");
++              return;
+       }
+ 
+-      memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
+-      if (ioremapped)
+-              early_iounmap(image, sizeof(bmp_header));
++      memcpy(&bmp_header, image, sizeof(bmp_header));
++      memunmap(image);
+       bgrt_image_size = bmp_header.size;
+ 
+       bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
+@@ -93,18 +86,14 @@ void __init efi_bgrt_init(void)
+               return;
+       }
+ 
+-      if (ioremapped) {
+-              image = early_ioremap(bgrt_tab->image_address,
+-                                     bmp_header.size);
+-              if (!image) {
+-                      pr_err("Ignoring BGRT: failed to map image memory\n");
+-                      kfree(bgrt_image);
+-                      bgrt_image = NULL;
+-                      return;
+-              }
++      image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB);
++      if (!image) {
++              pr_err("Ignoring BGRT: failed to map image memory\n");
++              kfree(bgrt_image);
++              bgrt_image = NULL;
++              return;
+       }
+ 
+-      memcpy_fromio(bgrt_image, image, bgrt_image_size);
+-      if (ioremapped)
+-              early_iounmap(image, bmp_header.size);
++      memcpy(bgrt_image, image, bgrt_image_size);
++      memunmap(image);
+ }
+diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
+index ad285404ea7f..3c1f3cd7b2ba 100644
+--- a/arch/x86/platform/efi/efi.c
++++ b/arch/x86/platform/efi/efi.c
+@@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void)
+  * This function will switch the EFI runtime services to virtual mode.
+  * Essentially, we look through the EFI memmap and map every region that
+  * has the runtime attribute bit set in its memory descriptor into the
+- * ->trampoline_pgd page table using a top-down VA allocation scheme.
++ * efi_pgd page table.
+  *
+  * The old method which used to update that memory descriptor with the
+  * virtual address obtained from ioremap() is still supported when the
+@@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void)
+  *
+  * The new method does a pagetable switch in a preemption-safe manner
+  * so that we're in a different address space when calling a runtime
+- * function. For function arguments passing we do copy the PGDs of the
+- * kernel page table into ->trampoline_pgd prior to each call.
++ * function. For function arguments passing we do copy the PUDs of the
++ * kernel page table into efi_pgd prior to each call.
+  *
+  * Specially for kexec boot, efi runtime maps in previous kernel should
+  * be passed in via setup_data. In that case runtime ranges will be mapped
+@@ -895,6 +895,12 @@ static void __init __efi_enter_virtual_mode(void)
+ 
+       efi.systab = NULL;
+ 
++      if (efi_alloc_page_tables()) {
++              pr_err("Failed to allocate EFI page tables\n");
++              clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
++              return;
++      }
++
+       efi_merge_regions();
+       new_memmap = efi_map_regions(&count, &pg_shift);
+       if (!new_memmap) {
+@@ -954,28 +960,11 @@ static void __init __efi_enter_virtual_mode(void)
+       efi_runtime_mkexec();
+ 
+       /*
+-       * We mapped the descriptor array into the EFI pagetable above but we're
+-       * not unmapping it here. Here's why:
+-       *
+-       * We're copying select PGDs from the kernel page table to the EFI page
+-       * table and when we do so and make changes to those PGDs like unmapping
+-       * stuff from them, those changes appear in the kernel page table and we
+-       * go boom.
+-       *
+-       * From setup_real_mode():
+-       *
+-       * ...
+-       * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+-       *
+-       * In this particular case, our allocation is in PGD 0 of the EFI page
+-       * table but we've copied that PGD from PGD[272] of the EFI page table:
+-       *
+-       *      pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
+-       *
+-       * where the direct memory mapping in kernel space is.
+-       *
+-       * new_memmap's VA comes from that direct mapping and thus clearing it,
+-       * it would get cleared in the kernel page table too.
++       * We mapped the descriptor array into the EFI pagetable above
++       * but we're not unmapping it here because if we're running in
++       * EFI mixed mode we need all of memory to be accessible when
++       * we pass parameters to the EFI runtime services in the
++       * thunking code.
+        *
+        * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
+        */
+diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
+index ed5b67338294..58d669bc8250 100644
+--- a/arch/x86/platform/efi/efi_32.c
++++ b/arch/x86/platform/efi/efi_32.c
+@@ -38,6 +38,11 @@
+  * say 0 - 3G.
+  */
+ 
++int __init efi_alloc_page_tables(void)
++{
++      return 0;
++}
++
+ void efi_sync_low_kernel_mappings(void) {}
+ void __init efi_dump_pagetable(void) {}
+ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
+index a0ac0f9c307f..18dfaad71c99 100644
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -40,6 +40,7 @@
+ #include <asm/fixmap.h>
+ #include <asm/realmode.h>
+ #include <asm/time.h>
++#include <asm/pgalloc.h>
+ 
+ /*
+  * We allocate runtime services regions bottom-up, starting from -4G, i.e.
+@@ -47,16 +48,7 @@
+  */
+ static u64 efi_va = EFI_VA_START;
+ 
+-/*
+- * Scratch space used for switching the pagetable in the EFI stub
+- */
+-struct efi_scratch {
+-      u64 r15;
+-      u64 prev_cr3;
+-      pgd_t *efi_pgt;
+-      bool use_pgd;
+-      u64 phys_stack;
+-} __packed;
++struct efi_scratch efi_scratch;
+ 
+ static void __init early_code_mapping_set_exec(int executable)
+ {
+@@ -83,8 +75,11 @@ pgd_t * __init efi_call_phys_prolog(void)
+       int pgd;
+       int n_pgds;
+ 
+-      if (!efi_enabled(EFI_OLD_MEMMAP))
+-              return NULL;
++      if (!efi_enabled(EFI_OLD_MEMMAP)) {
++              save_pgd = (pgd_t *)read_cr3();
++              write_cr3((unsigned long)efi_scratch.efi_pgt);
++              goto out;
++      }
+ 
+       early_code_mapping_set_exec(1);
+ 
+@@ -96,6 +91,7 @@ pgd_t * __init efi_call_phys_prolog(void)
+               vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
+               set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), 
*pgd_offset_k(vaddress));
+       }
++out:
+       __flush_tlb_all();
+ 
+       return save_pgd;
+@@ -109,8 +105,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
+       int pgd_idx;
+       int nr_pgds;
+ 
+-      if (!save_pgd)
++      if (!efi_enabled(EFI_OLD_MEMMAP)) {
++              write_cr3((unsigned long)save_pgd);
++              __flush_tlb_all();
+               return;
++      }
+ 
+       nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+ 
+@@ -123,27 +122,97 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
+       early_code_mapping_set_exec(0);
+ }
+ 
++static pgd_t *efi_pgd;
++
++/*
++ * We need our own copy of the higher levels of the page tables
++ * because we want to avoid inserting EFI region mappings (EFI_VA_END
++ * to EFI_VA_START) into the standard kernel page tables. Everything
++ * else can be shared, see efi_sync_low_kernel_mappings().
++ */
++int __init efi_alloc_page_tables(void)
++{
++      pgd_t *pgd;
++      pud_t *pud;
++      gfp_t gfp_mask;
++
++      if (efi_enabled(EFI_OLD_MEMMAP))
++              return 0;
++
++      gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO;
++      efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
++      if (!efi_pgd)
++              return -ENOMEM;
++
++      pgd = efi_pgd + pgd_index(EFI_VA_END);
++
++      pud = pud_alloc_one(NULL, 0);
++      if (!pud) {
++              free_page((unsigned long)efi_pgd);
++              return -ENOMEM;
++      }
++
++      pgd_populate(NULL, pgd, pud);
++
++      return 0;
++}
++
+ /*
+  * Add low kernel mappings for passing arguments to EFI functions.
+  */
+ void efi_sync_low_kernel_mappings(void)
+ {
+-      unsigned num_pgds;
+-      pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
++      unsigned num_entries;
++      pgd_t *pgd_k, *pgd_efi;
++      pud_t *pud_k, *pud_efi;
+ 
+       if (efi_enabled(EFI_OLD_MEMMAP))
+               return;
+ 
+-      num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET);
++      /*
++       * We can share all PGD entries apart from the one entry that
++       * covers the EFI runtime mapping space.
++       *
++       * Make sure the EFI runtime region mappings are guaranteed to
++       * only span a single PGD entry and that the entry also maps
++       * other important kernel regions.
++       */
++      BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
++      BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
++                      (EFI_VA_END & PGDIR_MASK));
++
++      pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
++      pgd_k = pgd_offset_k(PAGE_OFFSET);
+ 
+-      memcpy(pgd + pgd_index(PAGE_OFFSET),
+-              init_mm.pgd + pgd_index(PAGE_OFFSET),
+-              sizeof(pgd_t) * num_pgds);
++      num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
++      memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
++
++      /*
++       * We share all the PUD entries apart from those that map the
++       * EFI regions. Copy around them.
++       */
++      BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
++      BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
++
++      pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
++      pud_efi = pud_offset(pgd_efi, 0);
++
++      pgd_k = pgd_offset_k(EFI_VA_END);
++      pud_k = pud_offset(pgd_k, 0);
++
++      num_entries = pud_index(EFI_VA_END);
++      memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
++
++      pud_efi = pud_offset(pgd_efi, EFI_VA_START);
++      pud_k = pud_offset(pgd_k, EFI_VA_START);
++
++      num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
++      memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
+ }
+ 
+ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+ {
+-      unsigned long text;
++      unsigned long pfn, text;
+       struct page *page;
+       unsigned npages;
+       pgd_t *pgd;
+@@ -151,8 +220,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
+       if (efi_enabled(EFI_OLD_MEMMAP))
+               return 0;
+ 
+-      efi_scratch.efi_pgt = (pgd_t *)(unsigned 
long)real_mode_header->trampoline_pgd;
+-      pgd = __va(efi_scratch.efi_pgt);
++      efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
++      pgd = efi_pgd;
+ 
+       /*
+        * It can happen that the physical address of new_memmap lands in memory
+@@ -160,7 +229,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
+        * and ident-map those pages containing the map before calling
+        * phys_efi_set_virtual_address_map().
+        */
+-      if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, 
_PAGE_NX)) {
++      pfn = pa_memmap >> PAGE_SHIFT;
++      if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) {
+               pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
+               return 1;
+       }
+@@ -185,8 +255,9 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, 
unsigned num_pages)
+ 
+       npages = (_end - _text) >> PAGE_SHIFT;
+       text = __pa(_text);
++      pfn = text >> PAGE_SHIFT;
+ 
+-      if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) {
++      if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) {
+               pr_err("Failed to map kernel text 1:1\n");
+               return 1;
+       }
+@@ -196,20 +267,20 @@ int __init efi_setup_page_tables(unsigned long 
pa_memmap, unsigned num_pages)
+ 
+ void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned 
num_pages)
+ {
+-      pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+-
+-      kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
++      kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
+ }
+ 
+ static void __init __map_region(efi_memory_desc_t *md, u64 va)
+ {
+-      pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+-      unsigned long pf = 0;
++      unsigned long flags = 0;
++      unsigned long pfn;
++      pgd_t *pgd = efi_pgd;
+ 
+       if (!(md->attribute & EFI_MEMORY_WB))
+-              pf |= _PAGE_PCD;
++              flags |= _PAGE_PCD;
+ 
+-      if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf))
++      pfn = md->phys_addr >> PAGE_SHIFT;
++      if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags))
+               pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
+                          md->phys_addr, va);
+ }
+@@ -312,9 +383,7 @@ void __init efi_runtime_mkexec(void)
+ void __init efi_dump_pagetable(void)
+ {
+ #ifdef CONFIG_EFI_PGT_DUMP
+-      pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+-
+-      ptdump_walk_pgd_level(NULL, pgd);
++      ptdump_walk_pgd_level(NULL, efi_pgd);
+ #endif
+ }
+ 
+diff --git a/arch/x86/platform/efi/efi_stub_64.S 
b/arch/x86/platform/efi/efi_stub_64.S
+index 86d0f9e08dd9..32020cb8bb08 100644
+--- a/arch/x86/platform/efi/efi_stub_64.S
++++ b/arch/x86/platform/efi/efi_stub_64.S
+@@ -38,41 +38,6 @@
+       mov %rsi, %cr0;                 \
+       mov (%rsp), %rsp
+ 
+-      /* stolen from gcc */
+-      .macro FLUSH_TLB_ALL
+-      movq %r15, efi_scratch(%rip)
+-      movq %r14, efi_scratch+8(%rip)
+-      movq %cr4, %r15
+-      movq %r15, %r14
+-      andb $0x7f, %r14b
+-      movq %r14, %cr4
+-      movq %r15, %cr4
+-      movq efi_scratch+8(%rip), %r14
+-      movq efi_scratch(%rip), %r15
+-      .endm
+-
+-      .macro SWITCH_PGT
+-      cmpb $0, efi_scratch+24(%rip)
+-      je 1f
+-      movq %r15, efi_scratch(%rip)            # r15
+-      # save previous CR3
+-      movq %cr3, %r15
+-      movq %r15, efi_scratch+8(%rip)          # prev_cr3
+-      movq efi_scratch+16(%rip), %r15         # EFI pgt
+-      movq %r15, %cr3
+-      1:
+-      .endm
+-
+-      .macro RESTORE_PGT
+-      cmpb $0, efi_scratch+24(%rip)
+-      je 2f
+-      movq efi_scratch+8(%rip), %r15
+-      movq %r15, %cr3
+-      movq efi_scratch(%rip), %r15
+-      FLUSH_TLB_ALL
+-      2:
+-      .endm
+-
+ ENTRY(efi_call)
+       SAVE_XMM
+       mov (%rsp), %rax
+@@ -83,16 +48,8 @@ ENTRY(efi_call)
+       mov %r8, %r9
+       mov %rcx, %r8
+       mov %rsi, %rcx
+-      SWITCH_PGT
+       call *%rdi
+-      RESTORE_PGT
+       addq $48, %rsp
+       RESTORE_XMM
+       ret
+ ENDPROC(efi_call)
+-
+-      .data
+-ENTRY(efi_scratch)
+-      .fill 3,8,0
+-      .byte 0
+-      .quad 0
+diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
+index 3b52677f459a..0cd8f039602e 100644
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -325,38 +325,6 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md)
+       return end;
+ }
+ 
+-/*
+- * We can't ioremap data in EFI boot services RAM, because we've already 
mapped
+- * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
+- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
+- */
+-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
+-{
+-      struct efi_memory_map *map;
+-      void *p;
+-      map = efi.memmap;
+-      if (!map)
+-              return NULL;
+-      if (WARN_ON(!map->map))
+-              return NULL;
+-      for (p = map->map; p < map->map_end; p += map->desc_size) {
+-              efi_memory_desc_t *md = p;
+-              u64 size = md->num_pages << EFI_PAGE_SHIFT;
+-              u64 end = md->phys_addr + size;
+-              if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+-                  md->type != EFI_BOOT_SERVICES_CODE &&
+-                  md->type != EFI_BOOT_SERVICES_DATA)
+-                      continue;
+-              if (!md->virt_addr)
+-                      continue;
+-              if (phys_addr >= md->phys_addr && phys_addr < end) {
+-                      phys_addr += md->virt_addr - md->phys_addr;
+-                      return (__force void __iomem *)(unsigned long)phys_addr;
+-              }
+-      }
+-      return NULL;
+-}
+-
+ static __initdata efi_config_table_type_t common_tables[] = {
+       {ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
+       {ACPI_TABLE_GUID, "ACPI", &efi.acpi},
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+index f4cae5357e40..3e90ddcbb24a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+@@ -1575,34 +1575,32 @@ void amdgpu_atombios_scratch_regs_restore(struct 
amdgpu_device *adev)
+               WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]);
+ }
+ 
+-/* Atom needs data in little endian format
+- * so swap as appropriate when copying data to
+- * or from atom. Note that atom operates on
+- * dw units.
++/* Atom needs data in little endian format so swap as appropriate when copying
++ * data to or from atom. Note that atom operates on dw units.
++ *
++ * Use to_le=true when sending data to atom and provide at least
++ * ALIGN(num_bytes,4) bytes in the dst buffer.
++ *
++ * Use to_le=false when receiving data from atom and provide 
ALIGN(num_bytes,4)
++ * byes in the src buffer.
+  */
+ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
+ {
+ #ifdef __BIG_ENDIAN
+-      u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
+-      u32 *dst32, *src32;
++      u32 src_tmp[5], dst_tmp[5];
+       int i;
++      u8 align_num_bytes = ALIGN(num_bytes, 4);
+ 
+-      memcpy(src_tmp, src, num_bytes);
+-      src32 = (u32 *)src_tmp;
+-      dst32 = (u32 *)dst_tmp;
+       if (to_le) {
+-              for (i = 0; i < ((num_bytes + 3) / 4); i++)
+-                      dst32[i] = cpu_to_le32(src32[i]);
+-              memcpy(dst, dst_tmp, num_bytes);
++              memcpy(src_tmp, src, num_bytes);
++              for (i = 0; i < align_num_bytes / 4; i++)
++                      dst_tmp[i] = cpu_to_le32(src_tmp[i]);
++              memcpy(dst, dst_tmp, align_num_bytes);
+       } else {
+-              u8 dws = num_bytes & ~3;
+-              for (i = 0; i < ((num_bytes + 3) / 4); i++)
+-                      dst32[i] = le32_to_cpu(src32[i]);
+-              memcpy(dst, dst_tmp, dws);
+-              if (num_bytes % 4) {
+-                      for (i = 0; i < (num_bytes % 4); i++)
+-                              dst[dws+i] = dst_tmp[dws+i];
+-              }
++              memcpy(src_tmp, src, align_num_bytes);
++              for (i = 0; i < align_num_bytes / 4; i++)
++                      dst_tmp[i] = le32_to_cpu(src_tmp[i]);
++              memcpy(dst, dst_tmp, num_bytes);
+       }
+ #else
+       memcpy(dst, src, num_bytes);
+diff --git a/drivers/gpu/drm/i915/intel_i2c.c 
b/drivers/gpu/drm/i915/intel_i2c.c
+index f3bee54c414f..cb4313c68f71 100644
+--- a/drivers/gpu/drm/i915/intel_i2c.c
++++ b/drivers/gpu/drm/i915/intel_i2c.c
+@@ -440,7 +440,9 @@ static bool
+ gmbus_is_index_read(struct i2c_msg *msgs, int i, int num)
+ {
+       return (i + 1 < num &&
+-              !(msgs[i].flags & I2C_M_RD) && msgs[i].len <= 2 &&
++              msgs[i].addr == msgs[i + 1].addr &&
++              !(msgs[i].flags & I2C_M_RD) &&
++              (msgs[i].len == 1 || msgs[i].len == 2) &&
+               (msgs[i + 1].flags & I2C_M_RD));
+ }
+ 
+diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
+index f97b73ec4713..f418c002d323 100644
+--- a/drivers/gpu/drm/panel/panel-simple.c
++++ b/drivers/gpu/drm/panel/panel-simple.c
+@@ -352,6 +352,7 @@ static int panel_simple_remove(struct device *dev)
+       drm_panel_remove(&panel->base);
+ 
+       panel_simple_disable(&panel->base);
++      panel_simple_unprepare(&panel->base);
+ 
+       if (panel->ddc)
+               put_device(&panel->ddc->dev);
+@@ -367,6 +368,7 @@ static void panel_simple_shutdown(struct device *dev)
+       struct panel_simple *panel = dev_get_drvdata(dev);
+ 
+       panel_simple_disable(&panel->base);
++      panel_simple_unprepare(&panel->base);
+ }
+ 
+ static const struct drm_display_mode ampire_am800480r3tmqwa1h_mode = {
+diff --git a/drivers/gpu/drm/radeon/atombios_dp.c 
b/drivers/gpu/drm/radeon/atombios_dp.c
+index b5760851195c..0c6216a6ee9e 100644
+--- a/drivers/gpu/drm/radeon/atombios_dp.c
++++ b/drivers/gpu/drm/radeon/atombios_dp.c
+@@ -45,34 +45,32 @@ static char *pre_emph_names[] = {
+ 
+ /***** radeon AUX functions *****/
+ 
+-/* Atom needs data in little endian format
+- * so swap as appropriate when copying data to
+- * or from atom. Note that atom operates on
+- * dw units.
++/* Atom needs data in little endian format so swap as appropriate when copying
++ * data to or from atom. Note that atom operates on dw units.
++ *
++ * Use to_le=true when sending data to atom and provide at least
++ * ALIGN(num_bytes,4) bytes in the dst buffer.
++ *
++ * Use to_le=false when receiving data from atom and provide 
ALIGN(num_bytes,4)
++ * byes in the src buffer.
+  */
+ void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
+ {
+ #ifdef __BIG_ENDIAN
+-      u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
+-      u32 *dst32, *src32;
++      u32 src_tmp[5], dst_tmp[5];
+       int i;
++      u8 align_num_bytes = ALIGN(num_bytes, 4);
+ 
+-      memcpy(src_tmp, src, num_bytes);
+-      src32 = (u32 *)src_tmp;
+-      dst32 = (u32 *)dst_tmp;
+       if (to_le) {
+-              for (i = 0; i < ((num_bytes + 3) / 4); i++)
+-                      dst32[i] = cpu_to_le32(src32[i]);
+-              memcpy(dst, dst_tmp, num_bytes);
++              memcpy(src_tmp, src, num_bytes);
++              for (i = 0; i < align_num_bytes / 4; i++)
++                      dst_tmp[i] = cpu_to_le32(src_tmp[i]);
++              memcpy(dst, dst_tmp, align_num_bytes);
+       } else {
+-              u8 dws = num_bytes & ~3;
+-              for (i = 0; i < ((num_bytes + 3) / 4); i++)
+-                      dst32[i] = le32_to_cpu(src32[i]);
+-              memcpy(dst, dst_tmp, dws);
+-              if (num_bytes % 4) {
+-                      for (i = 0; i < (num_bytes % 4); i++)
+-                              dst[dws+i] = dst_tmp[dws+i];
+-              }
++              memcpy(src_tmp, src, align_num_bytes);
++              for (i = 0; i < align_num_bytes / 4; i++)
++                      dst_tmp[i] = le32_to_cpu(src_tmp[i]);
++              memcpy(dst, dst_tmp, num_bytes);
+       }
+ #else
+       memcpy(dst, src, num_bytes);
+diff --git a/drivers/gpu/drm/radeon/radeon_fb.c 
b/drivers/gpu/drm/radeon/radeon_fb.c
+index 26da2f4d7b4f..a2937a693591 100644
+--- a/drivers/gpu/drm/radeon/radeon_fb.c
++++ b/drivers/gpu/drm/radeon/radeon_fb.c
+@@ -226,7 +226,6 @@ static int radeonfb_create(struct drm_fb_helper *helper,
+       }
+ 
+       info->par = rfbdev;
+-      info->skip_vt_switch = true;
+ 
+       ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, 
gobj);
+       if (ret) {
+diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
+index ea47980949ef..4d46f2ce606f 100644
+--- a/drivers/md/bcache/alloc.c
++++ b/drivers/md/bcache/alloc.c
+@@ -479,7 +479,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned 
reserve,
+               if (b == -1)
+                       goto err;
+ 
+-              k->ptr[i] = PTR(ca->buckets[b].gen,
++              k->ptr[i] = MAKE_PTR(ca->buckets[b].gen,
+                               bucket_to_sector(c, b),
+                               ca->sb.nr_this_dev);
+ 
+diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
+index 243de0bf15cd..4bf15182c4da 100644
+--- a/drivers/md/bcache/extents.c
++++ b/drivers/md/bcache/extents.c
+@@ -584,7 +584,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct 
bkey *l, struct bkey
+               return false;
+ 
+       for (i = 0; i < KEY_PTRS(l); i++)
+-              if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
++              if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
+                   PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
+                       return false;
+ 
+diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
+index 29eba7219b01..6ed066a0e7c0 100644
+--- a/drivers/md/bcache/journal.c
++++ b/drivers/md/bcache/journal.c
+@@ -508,7 +508,7 @@ static void journal_reclaim(struct cache_set *c)
+                       continue;
+ 
+               ja->cur_idx = next;
+-              k->ptr[n++] = PTR(0,
++              k->ptr[n++] = MAKE_PTR(0,
+                                 bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
+                                 ca->sb.nr_this_dev);
+       }
+diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
+index 5d7c0900fa1b..f112c5bc082a 100644
+--- a/drivers/misc/eeprom/at24.c
++++ b/drivers/misc/eeprom/at24.c
+@@ -257,6 +257,9 @@ static ssize_t at24_read(struct at24_data *at24,
+       if (unlikely(!count))
+               return count;
+ 
++      if (off + count > at24->chip.byte_len)
++              return -EINVAL;
++
+       /*
+        * Read data from chip, protecting against concurrent updates
+        * from this host, but not from other I2C masters.
+@@ -311,6 +314,9 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, 
const char *buf,
+       unsigned long timeout, write_time;
+       unsigned next_page;
+ 
++      if (offset + count > at24->chip.byte_len)
++              return -EINVAL;
++
+       /* Get corresponding I2C address and adjust offset */
+       client = at24_translate_offset(at24, &offset);
+ 
+diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
+index 972ff844cf5a..cf7c7bc1e940 100644
+--- a/drivers/mmc/core/bus.c
++++ b/drivers/mmc/core/bus.c
+@@ -155,6 +155,9 @@ static int mmc_bus_suspend(struct device *dev)
+               return ret;
+ 
+       ret = host->bus_ops->suspend(host);
++      if (ret)
++              pm_generic_resume(dev);
++
+       return ret;
+ }
+ 
+diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
+index 54ab48827258..7ba109e8cf88 100644
+--- a/drivers/mtd/nand/nand_base.c
++++ b/drivers/mtd/nand/nand_base.c
+@@ -2663,15 +2663,18 @@ static int panic_nand_write(struct mtd_info *mtd, 
loff_t to, size_t len,
+                           size_t *retlen, const uint8_t *buf)
+ {
+       struct nand_chip *chip = mtd->priv;
++      int chipnr = (int)(to >> chip->chip_shift);
+       struct mtd_oob_ops ops;
+       int ret;
+ 
+-      /* Wait for the device to get ready */
+-      panic_nand_wait(mtd, chip, 400);
+-
+       /* Grab the device */
+       panic_nand_get_device(chip, mtd, FL_WRITING);
+ 
++      chip->select_chip(mtd, chipnr);
++
++      /* Wait for the device to get ready */
++      panic_nand_wait(mtd, chip, 400);
++
+       memset(&ops, 0, sizeof(ops));
+       ops.len = len;
+       ops.datbuf = (uint8_t *)buf;
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index c36a03fa7678..260f94b019c9 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3361,13 +3361,6 @@ again:
+               goto again;
+       }
+ 
+-      /* We've already setup this transaction, go ahead and exit */
+-      if (block_group->cache_generation == trans->transid &&
+-          i_size_read(inode)) {
+-              dcs = BTRFS_DC_SETUP;
+-              goto out_put;
+-      }
+-
+       /*
+        * We want to set the generation to 0, that way if anything goes wrong
+        * from here on out we know not to trust this cache when we load up next
+@@ -3391,6 +3384,13 @@ again:
+       }
+       WARN_ON(ret);
+ 
++      /* We've already setup this transaction, go ahead and exit */
++      if (block_group->cache_generation == trans->transid &&
++          i_size_read(inode)) {
++              dcs = BTRFS_DC_SETUP;
++              goto out_put;
++      }
++
+       if (i_size_read(inode) > 0) {
+               ret = btrfs_check_trunc_cache_free_space(root,
+                                       &root->fs_info->global_block_rsv);
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 348e0a05bd18..44e09483d2cd 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1260,7 +1260,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, 
unsigned int flags)
+               return 0;
+       }
+ 
+-      error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
++      error = nfs_lookup_verify_inode(inode, flags);
+       dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
+                       __func__, inode->i_ino, error ? "invalid" : "valid");
+       return !error;
+@@ -1420,6 +1420,7 @@ static int nfs4_lookup_revalidate(struct dentry *, 
unsigned int);
+ 
+ const struct dentry_operations nfs4_dentry_operations = {
+       .d_revalidate   = nfs4_lookup_revalidate,
++      .d_weak_revalidate      = nfs_weak_revalidate,
+       .d_delete       = nfs_dentry_delete,
+       .d_iput         = nfs_dentry_iput,
+       .d_automount    = nfs_d_automount,
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 421935f3d909..11c67e8b939d 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -3379,7 +3379,9 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct 
nfsd4_open *open)
+               /* ignore lock owners */
+               if (local->st_stateowner->so_is_open_owner == 0)
+                       continue;
+-              if (local->st_stateowner == &oo->oo_owner) {
++              if (local->st_stateowner != &oo->oo_owner)
++                      continue;
++              if (local->st_stid.sc_type == NFS4_OPEN_STID) {
+                       ret = local;
+                       atomic_inc(&ret->st_stid.sc_count);
+                       break;
+@@ -3388,6 +3390,52 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct 
nfsd4_open *open)
+       return ret;
+ }
+ 
++static __be32
++nfsd4_verify_open_stid(struct nfs4_stid *s)
++{
++      __be32 ret = nfs_ok;
++
++      switch (s->sc_type) {
++      default:
++              break;
++      case NFS4_CLOSED_STID:
++      case NFS4_CLOSED_DELEG_STID:
++              ret = nfserr_bad_stateid;
++              break;
++      case NFS4_REVOKED_DELEG_STID:
++              ret = nfserr_deleg_revoked;
++      }
++      return ret;
++}
++
++/* Lock the stateid st_mutex, and deal with races with CLOSE */
++static __be32
++nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
++{
++      __be32 ret;
++
++      mutex_lock(&stp->st_mutex);
++      ret = nfsd4_verify_open_stid(&stp->st_stid);
++      if (ret != nfs_ok)
++              mutex_unlock(&stp->st_mutex);
++      return ret;
++}
++
++static struct nfs4_ol_stateid *
++nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open 
*open)
++{
++      struct nfs4_ol_stateid *stp;
++      for (;;) {
++              spin_lock(&fp->fi_lock);
++              stp = nfsd4_find_existing_open(fp, open);
++              spin_unlock(&fp->fi_lock);
++              if (!stp || nfsd4_lock_ol_stateid(stp) == nfs_ok)
++                      break;
++              nfs4_put_stid(&stp->st_stid);
++      }
++      return stp;
++}
++
+ static struct nfs4_openowner *
+ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
+                          struct nfsd4_compound_state *cstate)
+@@ -3420,23 +3468,27 @@ alloc_init_open_stateowner(unsigned int strhashval, 
struct nfsd4_open *open,
+ }
+ 
+ static struct nfs4_ol_stateid *
+-init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
+-              struct nfsd4_open *open)
++init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
+ {
+ 
+       struct nfs4_openowner *oo = open->op_openowner;
+       struct nfs4_ol_stateid *retstp = NULL;
++      struct nfs4_ol_stateid *stp;
+ 
++      stp = open->op_stp;
+       /* We are moving these outside of the spinlocks to avoid the warnings */
+       mutex_init(&stp->st_mutex);
+       mutex_lock(&stp->st_mutex);
+ 
++retry:
+       spin_lock(&oo->oo_owner.so_client->cl_lock);
+       spin_lock(&fp->fi_lock);
+ 
+       retstp = nfsd4_find_existing_open(fp, open);
+       if (retstp)
+               goto out_unlock;
++
++      open->op_stp = NULL;
+       atomic_inc(&stp->st_stid.sc_count);
+       stp->st_stid.sc_type = NFS4_OPEN_STID;
+       INIT_LIST_HEAD(&stp->st_locks);
+@@ -3453,11 +3505,16 @@ out_unlock:
+       spin_unlock(&fp->fi_lock);
+       spin_unlock(&oo->oo_owner.so_client->cl_lock);
+       if (retstp) {
+-              mutex_lock(&retstp->st_mutex);
+-              /* Not that we need to, just for neatness */
++              /* Handle races with CLOSE */
++              if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
++                      nfs4_put_stid(&retstp->st_stid);
++                      goto retry;
++              }
++              /* To keep mutex tracking happy */
+               mutex_unlock(&stp->st_mutex);
++              stp = retstp;
+       }
+-      return retstp;
++      return stp;
+ }
+ 
+ /*
+@@ -4260,9 +4317,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct 
svc_fh *current_fh, struct nf
+       struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
+       struct nfs4_file *fp = NULL;
+       struct nfs4_ol_stateid *stp = NULL;
+-      struct nfs4_ol_stateid *swapstp = NULL;
+       struct nfs4_delegation *dp = NULL;
+       __be32 status;
++      bool new_stp = false;
+ 
+       /*
+        * Lookup file; if found, lookup stateid and check open request,
+@@ -4274,9 +4331,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct 
svc_fh *current_fh, struct nf
+               status = nfs4_check_deleg(cl, open, &dp);
+               if (status)
+                       goto out;
+-              spin_lock(&fp->fi_lock);
+-              stp = nfsd4_find_existing_open(fp, open);
+-              spin_unlock(&fp->fi_lock);
++              stp = nfsd4_find_and_lock_existing_open(fp, open);
+       } else {
+               open->op_file = NULL;
+               status = nfserr_bad_stateid;
+@@ -4284,41 +4339,31 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct 
svc_fh *current_fh, struct nf
+                       goto out;
+       }
+ 
++      if (!stp) {
++              stp = init_open_stateid(fp, open);
++              if (!open->op_stp)
++                      new_stp = true;
++      }
++
+       /*
+        * OPEN the file, or upgrade an existing OPEN.
+        * If truncate fails, the OPEN fails.
++       *
++       * stp is already locked.
+        */
+-      if (stp) {
++      if (!new_stp) {
+               /* Stateid was found, this is an OPEN upgrade */
+-              mutex_lock(&stp->st_mutex);
+               status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
+               if (status) {
+                       mutex_unlock(&stp->st_mutex);
+                       goto out;
+               }
+       } else {
+-              stp = open->op_stp;
+-              open->op_stp = NULL;
+-              /*
+-               * init_open_stateid() either returns a locked stateid
+-               * it found, or initializes and locks the new one we passed in
+-               */
+-              swapstp = init_open_stateid(stp, fp, open);
+-              if (swapstp) {
+-                      nfs4_put_stid(&stp->st_stid);
+-                      stp = swapstp;
+-                      status = nfs4_upgrade_open(rqstp, fp, current_fh,
+-                                              stp, open);
+-                      if (status) {
+-                              mutex_unlock(&stp->st_mutex);
+-                              goto out;
+-                      }
+-                      goto upgrade_out;
+-              }
+               status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
+               if (status) {
+-                      mutex_unlock(&stp->st_mutex);
++                      stp->st_stid.sc_type = NFS4_CLOSED_STID;
+                       release_open_stateid(stp);
++                      mutex_unlock(&stp->st_mutex);
+                       goto out;
+               }
+ 
+@@ -4327,7 +4372,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct 
svc_fh *current_fh, struct nf
+               if (stp->st_clnt_odstate == open->op_odstate)
+                       open->op_odstate = NULL;
+       }
+-upgrade_out:
++
+       nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
+       mutex_unlock(&stp->st_mutex);
+ 
+@@ -5153,7 +5198,6 @@ static void nfsd4_close_open_stateid(struct 
nfs4_ol_stateid *s)
+       bool unhashed;
+       LIST_HEAD(reaplist);
+ 
+-      s->st_stid.sc_type = NFS4_CLOSED_STID;
+       spin_lock(&clp->cl_lock);
+       unhashed = unhash_open_stateid(s, &reaplist);
+ 
+@@ -5192,10 +5236,12 @@ nfsd4_close(struct svc_rqst *rqstp, struct 
nfsd4_compound_state *cstate,
+       nfsd4_bump_seqid(cstate, status);
+       if (status)
+               goto out; 
++
++      stp->st_stid.sc_type = NFS4_CLOSED_STID;
+       nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
+-      mutex_unlock(&stp->st_mutex);
+ 
+       nfsd4_close_open_stateid(stp);
++      mutex_unlock(&stp->st_mutex);
+ 
+       /* put reference from nfs4_preprocess_seqid_op */
+       nfs4_put_stid(&stp->st_stid);
+diff --git a/include/linux/netlink.h b/include/linux/netlink.h
+index 639e9b8b0e4d..0b41959aab9f 100644
+--- a/include/linux/netlink.h
++++ b/include/linux/netlink.h
+@@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
+ struct netlink_callback {
+       struct sk_buff          *skb;
+       const struct nlmsghdr   *nlh;
++      int                     (*start)(struct netlink_callback *);
+       int                     (*dump)(struct sk_buff * skb,
+                                       struct netlink_callback *cb);
+       int                     (*done)(struct netlink_callback *cb);
+@@ -153,6 +154,7 @@ struct nlmsghdr *
+ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int 
flags);
+ 
+ struct netlink_dump_control {
++      int (*start)(struct netlink_callback *);
+       int (*dump)(struct sk_buff *skb, struct netlink_callback *);
+       int (*done)(struct netlink_callback *);
+       void *data;
+diff --git a/include/net/genetlink.h b/include/net/genetlink.h
+index 1b6b6dcb018d..43c0e771f417 100644
+--- a/include/net/genetlink.h
++++ b/include/net/genetlink.h
+@@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info 
*info, struct net *net)
+  * @flags: flags
+  * @policy: attribute validation policy
+  * @doit: standard command callback
++ * @start: start callback for dumps
+  * @dumpit: callback for dumpers
+  * @done: completion callback for dumps
+  * @ops_list: operations list
+@@ -122,6 +123,7 @@ struct genl_ops {
+       const struct nla_policy *policy;
+       int                    (*doit)(struct sk_buff *skb,
+                                      struct genl_info *info);
++      int                    (*start)(struct netlink_callback *cb);
+       int                    (*dumpit)(struct sk_buff *skb,
+                                        struct netlink_callback *cb);
+       int                    (*done)(struct netlink_callback *cb);
+diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
+index 22b6ad31c706..8562b1cb776b 100644
+--- a/include/uapi/linux/bcache.h
++++ b/include/uapi/linux/bcache.h
+@@ -90,7 +90,7 @@ PTR_FIELD(PTR_GEN,                   0,  8)
+ 
+ #define PTR_CHECK_DEV                 ((1 << PTR_DEV_BITS) - 1)
+ 
+-#define PTR(gen, offset, dev)                                         \
++#define MAKE_PTR(gen, offset, dev)                                    \
+       ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
+ 
+ /* Bkey utility code */
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 6c6f5ccfcda1..8f3769ec8575 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1304,17 +1304,11 @@ struct page *follow_trans_huge_pmd(struct 
vm_area_struct *vma,
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+       if (flags & FOLL_TOUCH) {
+               pmd_t _pmd;
+-              /*
+-               * We should set the dirty bit only for FOLL_WRITE but
+-               * for now the dirty bit in the pmd is meaningless.
+-               * And if the dirty bit will become meaningful and
+-               * we'll only set it with FOLL_WRITE, an atomic
+-               * set_bit will be required on the pmd to set the
+-               * young bit, instead of the current set_pmd_at.
+-               */
+-              _pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
++              _pmd = pmd_mkyoung(*pmd);
++              if (flags & FOLL_WRITE)
++                      _pmd = pmd_mkdirty(_pmd);
+               if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
+-                                        pmd, _pmd,  1))
++                                        pmd, _pmd, flags & FOLL_WRITE))
+                       update_mmu_cache_pmd(vma, addr, pmd);
+       }
+       if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+diff --git a/mm/madvise.c b/mm/madvise.c
+index c889fcbb530e..2a0f9a4504f1 100644
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -223,15 +223,14 @@ static long madvise_willneed(struct vm_area_struct *vma,
+ {
+       struct file *file = vma->vm_file;
+ 
++      *prev = vma;
+ #ifdef CONFIG_SWAP
+       if (!file) {
+-              *prev = vma;
+               force_swapin_readahead(vma, start, end);
+               return 0;
+       }
+ 
+       if (shmem_mapping(file->f_mapping)) {
+-              *prev = vma;
+               force_shm_swapin_readahead(vma, start, end,
+                                       file->f_mapping);
+               return 0;
+@@ -246,7 +245,6 @@ static long madvise_willneed(struct vm_area_struct *vma,
+               return 0;
+       }
+ 
+-      *prev = vma;
+       start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+       if (end > vma->vm_end)
+               end = vma->vm_end;
+diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+index 9ecdd61c6463..a87afc4f3c91 100644
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2203,6 +2203,7 @@ int __netlink_dump_start(struct sock *ssk, struct 
sk_buff *skb,
+ 
+       cb = &nlk->cb;
+       memset(cb, 0, sizeof(*cb));
++      cb->start = control->start;
+       cb->dump = control->dump;
+       cb->done = control->done;
+       cb->nlh = nlh;
+@@ -2216,6 +2217,9 @@ int __netlink_dump_start(struct sock *ssk, struct 
sk_buff *skb,
+ 
+       mutex_unlock(nlk->cb_mutex);
+ 
++      if (cb->start)
++              cb->start(cb);
++
+       ret = netlink_dump(sk);
+       sock_put(sk);
+ 
+diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
+index bc0e504f33a6..8e63662c6fb0 100644
+--- a/net/netlink/genetlink.c
++++ b/net/netlink/genetlink.c
+@@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 
seq,
+ }
+ EXPORT_SYMBOL(genlmsg_put);
+ 
++static int genl_lock_start(struct netlink_callback *cb)
++{
++      /* our ops are always const - netlink API doesn't propagate that */
++      const struct genl_ops *ops = cb->data;
++      int rc = 0;
++
++      if (ops->start) {
++              genl_lock();
++              rc = ops->start(cb);
++              genl_unlock();
++      }
++      return rc;
++}
++
+ static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+ {
+       /* our ops are always const - netlink API doesn't propagate that */
+@@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
+                               .module = family->module,
+                               /* we have const, but the netlink API doesn't */
+                               .data = (void *)ops,
++                              .start = genl_lock_start,
+                               .dump = genl_lock_dumpit,
+                               .done = genl_lock_done,
+                       };
+@@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
+               } else {
+                       struct netlink_dump_control c = {
+                               .module = family->module,
++                              .start = ops->start,
+                               .dump = ops->dumpit,
+                               .done = ops->done,
+                       };
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+index 7a5a64e70b4d..76944a4839a5 100644
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -1652,32 +1652,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int 
dir, int count, void *ptr
+ 
+ static int xfrm_dump_policy_done(struct netlink_callback *cb)
+ {
+-      struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) 
&cb->args[1];
++      struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
+       struct net *net = sock_net(cb->skb->sk);
+ 
+       xfrm_policy_walk_done(walk, net);
+       return 0;
+ }
+ 
++static int xfrm_dump_policy_start(struct netlink_callback *cb)
++{
++      struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
++
++      BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args));
++
++      xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
++      return 0;
++}
++
+ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
+ {
+       struct net *net = sock_net(skb->sk);
+-      struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) 
&cb->args[1];
++      struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
+       struct xfrm_dump_info info;
+ 
+-      BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) >
+-                   sizeof(cb->args) - sizeof(cb->args[0]));
+-
+       info.in_skb = cb->skb;
+       info.out_skb = skb;
+       info.nlmsg_seq = cb->nlh->nlmsg_seq;
+       info.nlmsg_flags = NLM_F_MULTI;
+ 
+-      if (!cb->args[0]) {
+-              cb->args[0] = 1;
+-              xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
+-      }
+-
+       (void) xfrm_policy_walk(net, walk, dump_one_policy, &info);
+ 
+       return skb->len;
+@@ -2415,6 +2417,7 @@ static const struct nla_policy 
xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
+ 
+ static const struct xfrm_link {
+       int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
++      int (*start)(struct netlink_callback *);
+       int (*dump)(struct sk_buff *, struct netlink_callback *);
+       int (*done)(struct netlink_callback *);
+       const struct nla_policy *nla_pol;
+@@ -2428,6 +2431,7 @@ static const struct xfrm_link {
+       [XFRM_MSG_NEWPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_add_policy    },
+       [XFRM_MSG_DELPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_get_policy    },
+       [XFRM_MSG_GETPOLICY   - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
++                                                 .start = 
xfrm_dump_policy_start,
+                                                  .dump = xfrm_dump_policy,
+                                                  .done = 
xfrm_dump_policy_done },
+       [XFRM_MSG_ALLOCSPI    - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
+@@ -2479,6 +2483,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct 
nlmsghdr *nlh)
+ 
+               {
+                       struct netlink_dump_control c = {
++                              .start = link->start,
+                               .dump = link->dump,
+                               .done = link->done,
+                       };

Reply via email to