With collapse scanning moved to processes, we can remove lot of code from
khugepaged, mostly related to maintenance of mm_slots, where khugepaged used
to track which mm's to scan.

We keep the hooks for vma operations such as khugepaged_enter() only to set
the MMF_VM_HUGEPAGE bit, which enables the scanning for given mm.

Signed-off-by: Vlastimil Babka <[email protected]>
---
 include/linux/khugepaged.h |  14 +---
 kernel/fork.c              |   1 -
 mm/huge_memory.c           | 193 +--------------------------------------------
 3 files changed, 3 insertions(+), 205 deletions(-)

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 51b2cc5..5af0f35 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -31,16 +31,10 @@ extern bool khugepaged_scan_mm(struct mm_struct *mm,
 static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct 
*oldmm)
 {
        if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags))
-               return __khugepaged_enter(mm);
+               set_bit(MMF_VM_HUGEPAGE, &mm->flags);
        return 0;
 }
 
-static inline void khugepaged_exit(struct mm_struct *mm)
-{
-       if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
-               __khugepaged_exit(mm);
-}
-
 static inline int khugepaged_enter(struct vm_area_struct *vma,
                                   unsigned long vm_flags)
 {
@@ -48,8 +42,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
                if ((khugepaged_always() ||
                     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
                    !(vm_flags & VM_NOHUGEPAGE))
-                       if (__khugepaged_enter(vma->vm_mm))
-                               return -ENOMEM;
+                       set_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags);
        return 0;
 }
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -57,9 +50,6 @@ static inline int khugepaged_fork(struct mm_struct *mm, 
struct mm_struct *oldmm)
 {
        return 0;
 }
-static inline void khugepaged_exit(struct mm_struct *mm)
-{
-}
 static inline int khugepaged_enter(struct vm_area_struct *vma,
                                   unsigned long vm_flags)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139..5541a9f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -659,7 +659,6 @@ void mmput(struct mm_struct *mm)
                uprobe_clear_state(mm);
                exit_aio(mm);
                ksm_exit(mm);
-               khugepaged_exit(mm); /* must run before exit_mmap */
                exit_mmap(mm);
                set_mm_exe_file(mm, NULL);
                if (!list_empty(&mm->mmlist)) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9172c7f..f497e6b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -56,7 +56,6 @@ unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 
10000;
 static unsigned int khugepaged_alloc_sleep_millisecs __read_mostly = 60000;
 static struct task_struct *khugepaged_thread __read_mostly;
 static DEFINE_MUTEX(khugepaged_mutex);
-static DEFINE_SPINLOCK(khugepaged_mm_lock);
 static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
 /*
  * default collapse hugepages if there is at least one pte mapped like
@@ -66,41 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
 static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
 
 static int khugepaged(void *none);
-static int khugepaged_slab_init(void);
 
-#define MM_SLOTS_HASH_BITS 10
-static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
-
-static struct kmem_cache *mm_slot_cache __read_mostly;
-
-/**
- * struct mm_slot - hash lookup from mm to mm_slot
- * @hash: hash collision list
- * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
- * @mm: the mm that this information is valid for
- */
-struct mm_slot {
-       struct hlist_node hash;
-       struct list_head mm_node;
-       struct mm_struct *mm;
-};
-
-/**
- * struct khugepaged_scan - cursor for scanning
- * @mm_head: the head of the mm list to scan
- * @mm_slot: the current mm_slot we are scanning
- * @address: the next address inside that to be scanned
- *
- * There is only the one khugepaged_scan instance of this cursor structure.
- */
-struct khugepaged_scan {
-       struct list_head mm_head;
-       struct mm_slot *mm_slot;
-       unsigned long address;
-};
-static struct khugepaged_scan khugepaged_scan = {
-       .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
-};
 static nodemask_t thp_avail_nodes = NODE_MASK_ALL;
 
 static int set_recommended_min_free_kbytes(void)
@@ -601,21 +566,12 @@ delete_obj:
        return err;
 }
 
-static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj)
-{
-       sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group);
-       sysfs_remove_group(hugepage_kobj, &hugepage_attr_group);
-       kobject_put(hugepage_kobj);
-}
 #else
 static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj)
 {
        return 0;
 }
 
-static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
-{
-}
 #endif /* CONFIG_SYSFS */
 
 static int __init hugepage_init(void)
@@ -632,10 +588,6 @@ static int __init hugepage_init(void)
        if (err)
                return err;
 
-       err = khugepaged_slab_init();
-       if (err)
-               goto out;
-
        register_shrinker(&huge_zero_page_shrinker);
 
        /*
@@ -649,9 +601,6 @@ static int __init hugepage_init(void)
        start_khugepaged();
 
        return 0;
-out:
-       hugepage_exit_sysfs(hugepage_kobj);
-       return err;
 }
 subsys_initcall(hugepage_init);
 
@@ -1979,83 +1928,6 @@ int hugepage_madvise(struct vm_area_struct *vma,
        return 0;
 }
 
-static int __init khugepaged_slab_init(void)
-{
-       mm_slot_cache = kmem_cache_create("khugepaged_mm_slot",
-                                         sizeof(struct mm_slot),
-                                         __alignof__(struct mm_slot), 0, NULL);
-       if (!mm_slot_cache)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static inline struct mm_slot *alloc_mm_slot(void)
-{
-       if (!mm_slot_cache)     /* initialization failed */
-               return NULL;
-       return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
-}
-
-static inline void free_mm_slot(struct mm_slot *mm_slot)
-{
-       kmem_cache_free(mm_slot_cache, mm_slot);
-}
-
-static struct mm_slot *get_mm_slot(struct mm_struct *mm)
-{
-       struct mm_slot *mm_slot;
-
-       hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm)
-               if (mm == mm_slot->mm)
-                       return mm_slot;
-
-       return NULL;
-}
-
-static void insert_to_mm_slots_hash(struct mm_struct *mm,
-                                   struct mm_slot *mm_slot)
-{
-       mm_slot->mm = mm;
-       hash_add(mm_slots_hash, &mm_slot->hash, (long)mm);
-}
-
-static inline int khugepaged_test_exit(struct mm_struct *mm)
-{
-       return atomic_read(&mm->mm_users) == 0;
-}
-
-int __khugepaged_enter(struct mm_struct *mm)
-{
-       struct mm_slot *mm_slot;
-       int wakeup;
-
-       mm_slot = alloc_mm_slot();
-       if (!mm_slot)
-               return -ENOMEM;
-
-       /* __khugepaged_exit() must not run from under us */
-       VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
-       if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
-               free_mm_slot(mm_slot);
-               return 0;
-       }
-
-       spin_lock(&khugepaged_mm_lock);
-       insert_to_mm_slots_hash(mm, mm_slot);
-       /*
-        * Insert just behind the scanning cursor, to let the area settle
-        * down a little.
-        */
-       wakeup = list_empty(&khugepaged_scan.mm_head);
-       list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
-       spin_unlock(&khugepaged_mm_lock);
-
-       atomic_inc(&mm->mm_count);
-
-       return 0;
-}
-
 int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
                               unsigned long vm_flags)
 {
@@ -2077,38 +1949,6 @@ int khugepaged_enter_vma_merge(struct vm_area_struct 
*vma,
        return 0;
 }
 
-void __khugepaged_exit(struct mm_struct *mm)
-{
-       struct mm_slot *mm_slot;
-       int free = 0;
-
-       spin_lock(&khugepaged_mm_lock);
-       mm_slot = get_mm_slot(mm);
-       if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
-               hash_del(&mm_slot->hash);
-               list_del(&mm_slot->mm_node);
-               free = 1;
-       }
-       spin_unlock(&khugepaged_mm_lock);
-
-       if (free) {
-               clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-               free_mm_slot(mm_slot);
-               mmdrop(mm);
-       } else if (mm_slot) {
-               /*
-                * This is required to serialize against
-                * khugepaged_test_exit() (which is guaranteed to run
-                * under mmap sem read mode). Stop here (after we
-                * return all pagetables will be destroyed) until
-                * khugepaged has finished working on the pagetables
-                * under the mmap_sem.
-                */
-               down_write(&mm->mmap_sem);
-               up_write(&mm->mmap_sem);
-       }
-}
-
 static void release_pte_page(struct page *page)
 {
        /* 0 stands for page_is_file_cache(page) == false */
@@ -2450,8 +2290,7 @@ static void collapse_huge_page(struct mm_struct *mm,
         * handled by the anon_vma lock + PG_lock.
         */
        down_write(&mm->mmap_sem);
-       if (unlikely(khugepaged_test_exit(mm)))
-               goto out;
+       VM_BUG_ON(atomic_read(&mm->mm_users) == 0);
 
        vma = find_vma(mm, address);
        if (!vma)
@@ -2629,29 +2468,6 @@ out:
        return ret;
 }
 
-static void collect_mm_slot(struct mm_slot *mm_slot)
-{
-       struct mm_struct *mm = mm_slot->mm;
-
-       VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
-
-       if (khugepaged_test_exit(mm)) {
-               /* free mm_slot */
-               hash_del(&mm_slot->hash);
-               list_del(&mm_slot->mm_node);
-
-               /*
-                * Not strictly needed because the mm exited already.
-                *
-                * clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-                */
-
-               /* khugepaged_mm_lock actually not necessary for the below */
-               free_mm_slot(mm_slot);
-               mmdrop(mm);
-       }
-}
-
 bool khugepaged_scan_mm(struct mm_struct *mm, unsigned long *start, long pages)
 {
        struct vm_area_struct *vma;
@@ -2750,7 +2566,6 @@ static void khugepaged_wait_work(bool did_alloc)
 
 static int khugepaged(void *none)
 {
-       struct mm_slot *mm_slot;
        bool did_alloc;
 
        set_freezable();
@@ -2761,12 +2576,6 @@ static int khugepaged(void *none)
                khugepaged_wait_work(did_alloc);
        }
 
-       spin_lock(&khugepaged_mm_lock);
-       mm_slot = khugepaged_scan.mm_slot;
-       khugepaged_scan.mm_slot = NULL;
-       if (mm_slot)
-               collect_mm_slot(mm_slot);
-       spin_unlock(&khugepaged_mm_lock);
        return 0;
 }
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to