Hello Ying Han,

You recently wrote a test program and posted to LKML as:

Subject: ftruncate-mmap: pages are lost after writing to mmaped file,
Date: Wed, 18 Mar 2009 12:44:08 -0700 (Thu, 01:14 IST)

>         We triggered the failure during some internal experiment with
>         ftruncate/mmap/write/read sequence. And we found that some pages are
>         "lost" after writing to the mmaped file. which in the following test
>         cases (count >= 0).
>         
>         First we deployed the test cases into group of machines and see about
>         >20% failure rate on average. Then, I did couple of experiment to try
>         to reproduce it on a single machine. what i found is that:
>         1. add a fsync after write the file, i can not reproduce this issue.
>         2. add memory pressure(mmap/mlock) while run the test in infinite
>         loop, the failure is reproduced quickly. ( background flushing ? )
>         
>         The "bad pages" count differs each time from one digit to 4,5 digit
>         for 128M ftruncated file. and what i also found that the bad page
>         number are contiguous for each segment which total bad pages container
>         several segments. ext "1-4, 9-20, 48-50" (  batch flushing ? )
>         
>         (The failure is reproduced based on 2.6.29-rc8, also happened on
>         2.6.18 kernel. . Here is the simple test case to reproduce it with
>         memory pressure. )

I would like to add this test as part of LTP(http://ltp.sourceforge.net/)
with the following patch, and with your due permission. If you do not have
any issue, can you please reply to this mail with a Sign-Off.

I have made couple of changes in your test program to make to run for
some hours, minutes & time.

I would also like to request you to kindly let us know any test program
you develop for the Linux Kernel in future with a:

Cc: Subrata Modak <[email protected]>,
Cc: ltp-list <[email protected]>,

Ported-To-And-Tested-On-LTP-By: Subrata Modak <[email protected]>,
---

--- ltp-full-20090331.orig/runtest/stress.part1 2009-04-01 18:23:55.000000000 
+0530
+++ ltp-full-20090331/runtest/stress.part1      2009-04-01 19:44:34.000000000 
+0530
@@ -18,6 +18,12 @@ mtest06 mmap1 -x 0.05
 
 mem02 mem02
 
+# Test for mmap() page corruption. This test is meant for
+# 1 hour and more. Please change -h(hour), -m(minute) &
+# -s(seconds) settings, if default not desired
+mmap-corruption01 mmap-corruption01 -h1 -m1 -s1
+
+
 page01 page01
 page02 page02
 
--- ltp-full-20090331.orig/testcases/kernel/mem/mmapstress/mmap-corruption01.c  
1970-01-01 05:30:00.000000000 +0530
+++ ltp-full-20090331/testcases/kernel/mem/mmapstress/mmap-corruption01.c       
2009-04-01 19:38:20.000000000 +0530
@@ -0,0 +1,185 @@
+/******************************************************************************/
+/*                                                                            
*/
+/* Copyright (s) Ying Han <[email protected]>, 2009                 */
+/*                                                                            
*/
+/* This program is free software;  you can redistribute it and/or modify      
*/
+/* it under the terms of the GNU General Public License as published by       
*/
+/* the Free Software Foundation; either version 2 of the License, or          
*/
+/* (at your option) any later version.                                        
*/
+/*                                                                            
*/
+/* This program is distributed in the hope that it will be useful,            
*/
+/* but WITHOUT ANY WARRANTY;  without even the implied warranty of            
*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See                  
*/
+/* the GNU General Public License for more details.                           
*/
+/*                                                                            
*/
+/* You should have received a copy of the GNU General Public License          
*/
+/* along with this program;  if not, write to the Free Software               
*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA    
*/
+/*                                                                            
*/
+/******************************************************************************/
+/*
+ ftruncate-mmap: pages are lost after writing to mmaped file,
+
+ We triggered the failure during some internal experiment with
+ ftruncate/mmap/write/read sequence. And we found that some pages are
+ "lost" after writing to the mmaped file. which in the following test
+ cases (count >= 0).
+        
+  First we deployed the test cases into group of machines and see about
+  >20% failure rate on average. Then, I did couple of experiment to try
+  to reproduce it on a single machine. what i found is that:
+  1. add a fsync after write the file, i can not reproduce this issue.
+  2. add memory pressure(mmap/mlock) while run the test in infinite
+  loop, the failure is reproduced quickly. ( background flushing ? )
+  
+  The "bad pages" count differs each time from one digit to 4,5 digit
+  for 128M ftruncated file. and what i also found that the bad page
+  number are contiguous for each segment which total bad pages container
+  several segments. ext "1-4, 9-20, 48-50" (  batch flushing ? )
+  
+  (The failure is reproduced based on 2.6.29-rc8, also happened on
+   2.6.18 kernel. . Here is the simple test case to reproduce it with
+   memory pressure. )
+*/
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+/* Harness Specific Include Files. */
+#include "test.h"
+#include "usctest.h"
+
+/* Extern Global Variables */
+extern int  Tst_count;               /* counter for tst_xxx routines.         
*/
+extern char *TESTDIR;                /* temporary dir created by tst_tmpdir() 
*/
+
+/* Global Variables */
+char *TCID     = "mmap-corruption01"; /* test program identifier.          */
+int  TST_TOTAL = 1;                  /* total number of tests in this file.   
*/
+
+
+long kMemSize  = 128 << 20;
+int kPageSize = 4096;
+
+char *usage="-h hours -m minutes -s secs\n";
+
+int anyfail()
+{
+  tst_resm(TFAIL, "Test failed\n");
+  tst_rmdir();
+  tst_exit();
+}
+
+int main(int argc, char **argv) {
+       char *progname;
+       int status;
+       int count = 0;
+       int i, c;
+       char *fname = "test.mmap-corruption";
+       char *mem;
+       unsigned long alarmtime = 0;
+       struct sigaction sa;
+       void finish(int sig);
+
+       progname = *argv;
+       while ((c = getopt(argc, argv, ":h:m:s:")) != -1) {
+                switch (c) {
+                case 'h':
+                         alarmtime += atoi(optarg) * 60 * 60;
+                         break;
+                case 'm':
+                         alarmtime += atoi(optarg) * 60;
+                         break;
+                case 's':
+                         alarmtime += atoi(optarg);
+                         break;
+                default:
+                        (void)fprintf(stderr, "usage: %s %s\n", progname,
+                                usage);
+                        anyfail();
+                }
+       } 
+
+        /*
+         *  Plan for death by signal.  User may have specified
+         *  a time limit, in which case set an alarm and catch SIGALRM.
+         *  Also catch and cleanup with SIGINT, SIGQUIT, and SIGTERM.
+         */
+        sa.sa_handler = finish;
+        sa.sa_flags = 0;
+        if (sigemptyset(&sa.sa_mask)) {
+                perror("sigempty error");
+                exit(1);
+        }
+
+        if (sigaction(SIGINT, &sa, 0) == -1) {
+                perror("sigaction error SIGINT");
+                exit(1);
+        }
+        if (alarmtime) {
+                if (sigaction(SIGALRM, &sa, 0) == -1) {
+                        perror("sigaction error");
+                        exit(1);                        
+                }
+                (void)alarm(alarmtime);
+                printf("mmap-corruption will run for=> %ld, 
seconds\n",alarmtime);    
+        } else { //Run for 5 secs only
+                if (sigaction(SIGALRM, &sa, 0) == -1) {
+                        perror("sigaction error");
+                        exit(1); 
+                }
+                (void)alarm(5);
+                printf("mmap-corruption will run for=> 5, seconds\n");    
+        }
+        /* If we get a SIGQUIT or SIGTERM, clean up and exit immediately. */
+        sa.sa_handler = finish;
+        if (sigaction(SIGQUIT, &sa, 0) == -1) {
+                perror("sigaction error SIGQUIT");
+                exit(1);                        
+        }
+        if (sigaction(SIGTERM, &sa, 0) == -1) {
+                perror("sigaction error SIGTERM");
+                exit(1);                        
+        }
+
+
+       tst_tmpdir();
+       while (1) {
+             unlink(fname);
+             int fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0600);
+             status = ftruncate(fd, kMemSize);
+
+             mem = mmap(0, kMemSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 
0);
+             // Fill the memory with 1s.
+             memset(mem, 1, kMemSize);
+
+             for (i = 0; i < kMemSize; i++) {
+                  int byte_good = mem[i] != 0;
+                  if (!byte_good && ((i % kPageSize) == 0)) {
+                       //printf("%d ", i / kPageSize);
+                       count++;
+                  }
+             }
+             munmap(mem, kMemSize);
+             close(fd);
+             unlink(fname);
+             if (count > 0) {
+                 printf("Running %d bad page\n", count);
+                 return 1;
+             }
+             count=0;
+       }
+       return 0;
+}
+
+void finish(int sig) {
+     printf("mmap-corruption PASSED\n");
+     exit(0);
+}
+

---
Regards--
Subrata


------------------------------------------------------------------------------
_______________________________________________
Ltp-list mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/ltp-list

Reply via email to