> 
> When migrating anonymous memory from system memory to device memory
> CPU pte are replaced with special HMM swap entry so that page fault,
> get user page (gup), fork, ... are properly redirected to HMM helpers.
> 
> This patch only add the new swap type entry and hooks HMM helpers
> functions inside the page fault and fork code path.
> 
> Changed since v1:

But the subject line says this work is v11

>   - Fix name when of HMM CPU page fault function.
> 
> Signed-off-by: Jérôme Glisse <[email protected]>
> Signed-off-by: Sherry Cheung <[email protected]>
> Signed-off-by: Subhash Gutti <[email protected]>
> Signed-off-by: Mark Hairgrove <[email protected]>
> Signed-off-by: John Hubbard <[email protected]>
> Signed-off-by: Jatin Kumar <[email protected]>
> ---
>  include/linux/hmm.h     | 34 ++++++++++++++++++++++++++++++++++
>  include/linux/swap.h    | 13 ++++++++++++-
>  include/linux/swapops.h | 43 ++++++++++++++++++++++++++++++++++++++++++-
>  mm/hmm.c                | 21 +++++++++++++++++++++
>  mm/memory.c             | 22 ++++++++++++++++++++++
>  5 files changed, 131 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 4bc132a..7c66513 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h

I find no hmm.h in 4.3-rc6

> @@ -272,6 +272,40 @@ void hmm_mirror_range_dirty(struct hmm_mirror *mirror,
>                           unsigned long start,
>                           unsigned long end);
> 
> +int hmm_handle_cpu_fault(struct mm_struct *mm,
> +                     struct vm_area_struct *vma,
> +                     pmd_t *pmdp, unsigned long addr,
> +                     unsigned flags, pte_t orig_pte);
> +
> +int hmm_mm_fork(struct mm_struct *src_mm,
> +             struct mm_struct *dst_mm,
> +             struct vm_area_struct *dst_vma,
> +             pmd_t *dst_pmd,
> +             unsigned long start,
> +             unsigned long end);
> +
> +#else /* CONFIG_HMM */
> +
> +static inline int hmm_handle_cpu_fault(struct mm_struct *mm,
> +                                    struct vm_area_struct *vma,
> +                                    pmd_t *pmdp, unsigned long addr,
> +                                    unsigned flags, pte_t orig_pte)
> +{
> +     return VM_FAULT_SIGBUS;
> +}
> +
> +static inline int hmm_mm_fork(struct mm_struct *src_mm,
> +                           struct mm_struct *dst_mm,
> +                           struct vm_area_struct *dst_vma,
> +                           pmd_t *dst_pmd,
> +                           unsigned long start,
> +                           unsigned long end)
> +{
> +     BUG();

s/BUG/BUILD_BUG/ ?

> +     return -ENOMEM;
> +}
> 
>  #endif /* CONFIG_HMM */
> +
> +
>  #endif
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7ba7dcc..5c8b871 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -70,8 +70,19 @@ static inline int current_is_kswapd(void)
>  #define SWP_HWPOISON_NUM 0
>  #endif
> 
> +/*
> + * HMM (heterogeneous memory management) used when data is in remote memory.
> + */
> +#ifdef CONFIG_HMM
> +#define SWP_HMM_NUM 1
> +#define SWP_HMM              (MAX_SWAPFILES + SWP_MIGRATION_NUM + 
> SWP_HWPOISON_NUM)
> +#else
> +#define SWP_HMM_NUM 0
> +#endif
> +
>  #define MAX_SWAPFILES \
> -     ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
> +     ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - \
> +      SWP_HWPOISON_NUM - SWP_HMM_NUM)
> 
>  /*
>   * Magic header for a swap area. The first part of the union is
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 5c3a5f3..8c6ba9f 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -227,7 +227,7 @@ static inline void num_poisoned_pages_inc(void)
>  }
>  #endif
> 
> -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
> +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || 
> defined(CONFIG_HMM)
>  static inline int non_swap_entry(swp_entry_t entry)
>  {
>       return swp_type(entry) >= MAX_SWAPFILES;
> @@ -239,4 +239,45 @@ static inline int non_swap_entry(swp_entry_t entry)
>  }
>  #endif
> 
> +#ifdef CONFIG_HMM
> +static inline swp_entry_t make_hmm_entry(void)
> +{
> +     /* We do not store anything inside the CPU page table entry (pte). */

pte is clear enough, no?

> +     return swp_entry(SWP_HMM, 0);
> +}
> +
> +static inline swp_entry_t make_hmm_entry_locked(void)
> +{
> +     /* We do not store anything inside the CPU page table entry (pte). */
> +     return swp_entry(SWP_HMM, 1);
> +}
> +
> +static inline swp_entry_t make_hmm_entry_poisonous(void)
> +{
> +     /* We do not store anything inside the CPU page table entry (pte). */
> +     return swp_entry(SWP_HMM, 2);
> +}
> +
> +static inline int is_hmm_entry(swp_entry_t entry)
> +{
> +     return (swp_type(entry) == SWP_HMM);
> +}
> +
> +static inline int is_hmm_entry_locked(swp_entry_t entry)
> +{
> +     return (swp_type(entry) == SWP_HMM) && (swp_offset(entry) == 1);
> +}
> +
> +static inline int is_hmm_entry_poisonous(swp_entry_t entry)
> +{
> +     return (swp_type(entry) == SWP_HMM) && (swp_offset(entry) == 2);
> +}

So SWP_HMM_LOCKED and SWP_HMM_POISON should be defined.

> +#else /* CONFIG_HMM */
> +static inline int is_hmm_entry(swp_entry_t swp)
> +{
> +     return 0;
> +}
> +#endif /* CONFIG_HMM */
> +
> +
>  #endif /* _LINUX_SWAPOPS_H */
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 9e5017a..7fb493f 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -416,6 +416,27 @@ static struct mmu_notifier_ops hmm_notifier_ops = {
>  };
> 
> 
> +int hmm_handle_cpu_fault(struct mm_struct *mm,
> +                     struct vm_area_struct *vma,
> +                     pmd_t *pmdp, unsigned long addr,
> +                     unsigned flags, pte_t orig_pte)
> +{
> +     return VM_FAULT_SIGBUS;
> +}
> +EXPORT_SYMBOL(hmm_handle_cpu_fault);
> +
> +int hmm_mm_fork(struct mm_struct *src_mm,
> +             struct mm_struct *dst_mm,
> +             struct vm_area_struct *dst_vma,
> +             pmd_t *dst_pmd,
> +             unsigned long start,
> +             unsigned long end)
> +{
> +     return -ENOMEM;
> +}
> +EXPORT_SYMBOL(hmm_mm_fork);
> +
> +
>  struct mm_pt_iter {
>       struct mm_struct        *mm;
>       pte_t                   *ptep;
> diff --git a/mm/memory.c b/mm/memory.c
> index bbab5e9..08bc37e 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -53,6 +53,7 @@
>  #include <linux/writeback.h>
>  #include <linux/memcontrol.h>
>  #include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
>  #include <linux/kallsyms.h>
>  #include <linux/swapops.h>
>  #include <linux/elf.h>
> @@ -894,9 +895,11 @@ static int copy_pte_range(struct mm_struct *dst_mm, 
> struct mm_struct *src_mm,
>       pte_t *orig_src_pte, *orig_dst_pte;
>       pte_t *src_pte, *dst_pte;
>       spinlock_t *src_ptl, *dst_ptl;
> +     unsigned cnt_hmm_entry = 0;

s/cnt_hmm_entry/hmm_ptes/ ?

>       int progress = 0;
>       int rss[NR_MM_COUNTERS];
>       swp_entry_t entry = (swp_entry_t){0};
> +     unsigned long start;
> 
>  again:
>       init_rss_vec(rss);
> @@ -910,6 +913,7 @@ again:
>       orig_src_pte = src_pte;
>       orig_dst_pte = dst_pte;
>       arch_enter_lazy_mmu_mode();
> +     start = addr;
> 
>       do {
>               /*
> @@ -926,6 +930,12 @@ again:
>                       progress++;
>                       continue;
>               }
> +             if (unlikely(!pte_present(*src_pte))) {
> +                     entry = pte_to_swp_entry(*src_pte);
> +
> +                     if (is_hmm_entry(entry))
> +                             cnt_hmm_entry++;
> +             }
>               entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>                                                       vma, addr, rss);
>               if (entry.val)
> @@ -940,6 +950,15 @@ again:
>       pte_unmap_unlock(orig_dst_pte, dst_ptl);
>       cond_resched();
> 
> +     if (cnt_hmm_entry) {
> +             int ret;
> +
> +             ret = hmm_mm_fork(src_mm, dst_mm, dst_vma,
> +                               dst_pmd, start, end);

Given start, s/end/addr/, no?

> +             if (ret)
> +                     return ret;
> +     }
> +
>       if (entry.val) {
>               if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
>                       return -ENOMEM;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to