collapse_file() now supports collapsing clean pagecache folios from
writable files, so add corresponding tests.

Note that madvise(MADV_COLLAPSE) works for dirty pagecache folios from
writable files, because collapse_single_pmd() triggers a synchronous
writeback when first attempt of collapse_file() fails.  That writeback
makes dirty folios clean and the retry of collapse_file() succeeds.

Signed-off-by: Zi Yan <[email protected]>
Cc: Al Viro <[email protected]>
Cc: Baolin Wang <[email protected]>
Cc: Barry Song <[email protected]>
Cc: Chris Mason <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: David Hildenbrand (Arm) <[email protected]>
Cc: David Sterba <[email protected]>
Cc: Dev Jain <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Lance Yang <[email protected]>
Cc: Liam Howlett <[email protected]>
Cc: Lorenzo Stoakes <[email protected]>
Cc: Matthew Wilcox (Oracle) <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Mike Rapoport <[email protected]>
Cc: Nico Pache <[email protected]>
Cc: Ryan Roberts <[email protected]>
Cc: Shuah Khan <[email protected]>
Cc: Song Liu <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Cc: Vlastimil Babka <[email protected]>
---
 tools/testing/selftests/mm/khugepaged.c | 111 ++++++++++++++++++------
 1 file changed, 85 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/mm/khugepaged.c 
b/tools/testing/selftests/mm/khugepaged.c
index 7ee6dfe93b3d9..9ce0a11b461df 100644
--- a/tools/testing/selftests/mm/khugepaged.c
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -41,6 +41,12 @@ enum vma_type {
        VMA_SHMEM,
 };
 
+enum file_setup_ops {
+       FILE_SETUP_READ_ONLY_FS,
+       FILE_SETUP_READ_WRITE_FS_READ_DATA,
+       FILE_SETUP_READ_WRITE_FS_WRITE_DATA,
+};
+
 struct mem_ops {
        void *(*setup_area)(int nr_hpages);
        void (*cleanup_area)(void *p, unsigned long size);
@@ -50,7 +56,8 @@ struct mem_ops {
 };
 
 static struct mem_ops *read_only_file_ops;
-static struct mem_ops *read_write_file_ops;
+static struct mem_ops *read_write_file_read_ops;
+static struct mem_ops *read_write_file_write_ops;
 static struct mem_ops *anon_ops;
 static struct mem_ops *shmem_ops;
 
@@ -113,7 +120,8 @@ static void restore_settings(int sig)
 static void save_settings(void)
 {
        printf("Save THP and khugepaged settings...");
-       if ((read_only_file_ops || read_write_file_ops) &&
+       if ((read_only_file_ops || read_write_file_read_ops ||
+            read_write_file_write_ops) &&
            finfo.type == VMA_FILE)
                thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path);
        thp_save_settings();
@@ -366,10 +374,10 @@ static bool anon_check_huge(void *addr, int nr_hpages)
        return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
 }
 
-static void *file_setup_area_common(int nr_hpages, bool read_only)
+static void *file_setup_area_common(int nr_hpages, enum file_setup_ops setup)
 {
-       const int open_opt = read_only ? O_RDONLY : O_RDWR;
-       const int mmap_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE);
+       const int open_opt = setup == FILE_SETUP_READ_ONLY_FS ? O_RDONLY : 
O_RDWR;
+       const int mmap_prot = setup == FILE_SETUP_READ_ONLY_FS ? PROT_READ : 
(PROT_READ | PROT_WRITE);
        int fd;
        void *p;
        unsigned long size;
@@ -405,7 +413,10 @@ static void *file_setup_area_common(int nr_hpages, bool 
read_only)
        success("OK");
 
        printf("Opening %s %s for collapse...", finfo.path,
-              read_only ? "read-only" : "read-write");
+              setup == FILE_SETUP_READ_ONLY_FS ? "read-only" :
+              setup == FILE_SETUP_READ_WRITE_FS_READ_DATA ?
+                                                 "read-write (read)" :
+                                                 "read-write (write)");
        finfo.fd = open(finfo.path, open_opt, 777);
        if (finfo.fd < 0) {
                perror("open()");
@@ -425,12 +436,17 @@ static void *file_setup_area_common(int nr_hpages, bool 
read_only)
 
 static void *file_setup_read_only_area(int nr_hpages)
 {
-       return file_setup_area_common(nr_hpages, /* read_only= */ true);
+       return file_setup_area_common(nr_hpages, FILE_SETUP_READ_ONLY_FS);
+}
+
+static void *file_setup_read_write_fs_read_area(int nr_hpages)
+{
+       return file_setup_area_common(nr_hpages, 
FILE_SETUP_READ_WRITE_FS_READ_DATA);
 }
 
-static void *file_setup_read_write_area(int nr_hpages)
+static void *file_setup_read_write_fs_write_area(int nr_hpages)
 {
-       return file_setup_area_common(nr_hpages, /* read_only= */ false);
+       return file_setup_area_common(nr_hpages, 
FILE_SETUP_READ_WRITE_FS_WRITE_DATA);
 }
 
 static void file_cleanup_area(void *p, unsigned long size)
@@ -448,6 +464,16 @@ static void file_fault_read(void *p, unsigned long start, 
unsigned long end)
        }
 }
 
+static void file_fault_read_and_flush(void *p, unsigned long start, unsigned 
long end)
+{
+       file_fault_read(p, start, end);
+       /*
+        * make folio clean, since dirty folios from read&write file are
+        * rejected and not flushed
+        */
+       msync((char *)p + start, end - start, MS_SYNC);
+}
+
 static void file_fault_write(void *p, unsigned long start, unsigned long end)
 {
        if (madvise(((char *)p) + start, end - start, MADV_POPULATE_WRITE)) {
@@ -519,8 +545,16 @@ static struct mem_ops __read_only_file_ops = {
        .name = "file",
 };
 
-static struct mem_ops __read_write_file_ops = {
-       .setup_area = &file_setup_read_write_area,
+static struct mem_ops __read_write_file_read_ops = {
+       .setup_area = &file_setup_read_write_fs_read_area,
+       .cleanup_area = &file_cleanup_area,
+       .fault = &file_fault_read_and_flush,
+       .check_huge = &file_check_huge,
+       .name = "file",
+};
+
+static struct mem_ops __read_write_file_write_ops = {
+       .setup_area = &file_setup_read_write_fs_write_area,
        .cleanup_area = &file_cleanup_area,
        .fault = &file_fault_write,
        .check_huge = &file_check_huge,
@@ -537,7 +571,9 @@ static struct mem_ops __shmem_ops = {
 
 static bool is_tmpfs(struct mem_ops *ops)
 {
-       return (ops == &__read_only_file_ops || ops == &__read_write_file_ops) 
&&
+       return (ops == &__read_only_file_ops ||
+               ops == &__read_write_file_read_ops ||
+               ops == &__read_write_file_write_ops) &&
               finfo.type == VMA_SHMEM;
 }
 
@@ -554,9 +590,11 @@ static void __madvise_collapse(const char *msg, char *p, 
int nr_hpages,
 
        printf("%s...", msg);
 
-       /* read&write file collapse always fail */
-       if (!is_tmpfs(ops) && ops == &__read_write_file_ops)
-               expect = false;
+       /*
+        * read&write file collapse succeeds for MADV_COLLAPSE because dirty
+        * folios are written back after collapse fails for dirty folios and
+        * another collapse is attempted.
+        */
 
        /*
         * Prevent khugepaged interference and tests that MADV_COLLAPSE
@@ -624,8 +662,11 @@ static bool wait_for_scan(const char *msg, char *p, int 
nr_hpages,
 static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
                                struct mem_ops *ops, bool expect)
 {
-       /* read&write file collapse always fail */
-       if (!is_tmpfs(ops) && ops == &__read_write_file_ops)
+       /*
+        * read&write file collapse fails since khugepaged does not flush
+        * the target dirty folios
+        */
+       if (!is_tmpfs(ops) && ops == &__read_write_file_write_ops)
                expect = false;
 
        if (wait_for_scan(msg, p, nr_hpages, ops)) {
@@ -748,6 +789,9 @@ static void collapse_max_ptes_none(struct collapse_context 
*c, struct mem_ops *o
        validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * 
page_size);
 
        if (c->enforce_pte_scan_limits) {
+               ops->cleanup_area(p, hpage_pmd_size);
+               p = ops->setup_area(1);
+
                ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
                c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
                            true);
@@ -1195,21 +1239,24 @@ static void parse_test_type(int argc, char **argv)
 
        if (!strcmp(buf, "all")) {
                read_only_file_ops =  &__read_only_file_ops;
-               read_write_file_ops =  &__read_write_file_ops;
+               read_write_file_read_ops =  &__read_write_file_read_ops;
+               read_write_file_write_ops =  &__read_write_file_write_ops;
                anon_ops = &__anon_ops;
                shmem_ops = &__shmem_ops;
        } else if (!strcmp(buf, "anon")) {
                anon_ops = &__anon_ops;
        } else if (!strcmp(buf, "file")) {
                read_only_file_ops =  &__read_only_file_ops;
-               read_write_file_ops =  &__read_write_file_ops;
+               read_write_file_read_ops =  &__read_write_file_read_ops;
+               read_write_file_write_ops =  &__read_write_file_write_ops;
        } else if (!strcmp(buf, "shmem")) {
                shmem_ops = &__shmem_ops;
        } else {
                usage();
        }
 
-       if (!read_only_file_ops && !read_write_file_ops)
+       if (!read_only_file_ops && !read_write_file_read_ops &&
+           !read_write_file_write_ops)
                return;
 
        if (argc != 2)
@@ -1282,11 +1329,13 @@ int main(int argc, char **argv)
 
        TEST(collapse_full, khugepaged_context, anon_ops);
        TEST(collapse_full, khugepaged_context, read_only_file_ops);
-       TEST(collapse_full, khugepaged_context, read_write_file_ops);
+       TEST(collapse_full, khugepaged_context, read_write_file_read_ops);
+       TEST(collapse_full, khugepaged_context, read_write_file_write_ops);
        TEST(collapse_full, khugepaged_context, shmem_ops);
        TEST(collapse_full, madvise_context, anon_ops);
        TEST(collapse_full, madvise_context, read_only_file_ops);
-       TEST(collapse_full, madvise_context, read_write_file_ops);
+       TEST(collapse_full, madvise_context, read_write_file_read_ops);
+       TEST(collapse_full, madvise_context, read_write_file_write_ops);
        TEST(collapse_full, madvise_context, shmem_ops);
 
        TEST(collapse_empty, khugepaged_context, anon_ops);
@@ -1294,30 +1343,38 @@ int main(int argc, char **argv)
 
        TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
        TEST(collapse_single_pte_entry, khugepaged_context, read_only_file_ops);
-       TEST(collapse_single_pte_entry, khugepaged_context, 
read_write_file_ops);
+       TEST(collapse_single_pte_entry, khugepaged_context, 
read_write_file_read_ops);
+       TEST(collapse_single_pte_entry, khugepaged_context, 
read_write_file_write_ops);
        TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
        TEST(collapse_single_pte_entry, madvise_context, anon_ops);
        TEST(collapse_single_pte_entry, madvise_context, read_only_file_ops);
-       TEST(collapse_single_pte_entry, madvise_context, read_write_file_ops);
+       TEST(collapse_single_pte_entry, madvise_context, 
read_write_file_read_ops);
+       TEST(collapse_single_pte_entry, madvise_context, 
read_write_file_write_ops);
        TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
 
        TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
        TEST(collapse_max_ptes_none, khugepaged_context, read_only_file_ops);
-       TEST(collapse_max_ptes_none, khugepaged_context, read_write_file_ops);
+       TEST(collapse_max_ptes_none, khugepaged_context, 
read_write_file_read_ops);
+       TEST(collapse_max_ptes_none, khugepaged_context, 
read_write_file_write_ops);
        TEST(collapse_max_ptes_none, madvise_context, anon_ops);
        TEST(collapse_max_ptes_none, madvise_context, read_only_file_ops);
-       TEST(collapse_max_ptes_none, madvise_context, read_write_file_ops);
+       TEST(collapse_max_ptes_none, madvise_context, read_write_file_read_ops);
+       TEST(collapse_max_ptes_none, madvise_context, 
read_write_file_write_ops);
 
        TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
        TEST(collapse_single_pte_entry_compound, khugepaged_context, 
read_only_file_ops);
+       TEST(collapse_single_pte_entry_compound, khugepaged_context, 
read_write_file_read_ops);
        TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
        TEST(collapse_single_pte_entry_compound, madvise_context, 
read_only_file_ops);
+       TEST(collapse_single_pte_entry_compound, madvise_context, 
read_write_file_read_ops);
 
        TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
        TEST(collapse_full_of_compound, khugepaged_context, read_only_file_ops);
+       TEST(collapse_full_of_compound, khugepaged_context, 
read_write_file_read_ops);
        TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
        TEST(collapse_full_of_compound, madvise_context, anon_ops);
        TEST(collapse_full_of_compound, madvise_context, read_only_file_ops);
+       TEST(collapse_full_of_compound, madvise_context, 
read_write_file_read_ops);
        TEST(collapse_full_of_compound, madvise_context, shmem_ops);
 
        TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
@@ -1340,9 +1397,11 @@ int main(int argc, char **argv)
 
        TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
        TEST(madvise_collapse_existing_thps, madvise_context, 
read_only_file_ops);
+       TEST(madvise_collapse_existing_thps, madvise_context, 
read_write_file_read_ops);
        TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
 
        TEST(madvise_retracted_page_tables, madvise_context, 
read_only_file_ops);
+       TEST(madvise_retracted_page_tables, madvise_context, 
read_write_file_read_ops);
        TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
 
        restore_settings(0);
-- 
2.53.0


Reply via email to