Cache consistency with mmap'ed writes can apparently only be achieved when
no afs_linux_writepage_sync's are aborted at all.
I think I still haven't truly understood the exact idea of the antirecursion
patch. To me, it seems to omit writing some precious data.
We had a backtrace which showed we were calling back into ourselves.
The sole goal was to prevent that. Anything beyond was incidental and
uneeded.
There is yet another fix proposal in RT #124627. It works for me at least.
Evidently, after applying fixes, deadlocks could still occur during the
first invocation of osi_VM_StoreAllSegments, so
linux-mmap-antirecursion-20081020 never really worked, I'm afraid.
This current fix hopefully will.
One thing had me puzzled for a while - the mmap_test program (attached)
works fine on a 2.6.18-128.1.6.el5 kernel, but reliably produces corrupt
chunks in a virtual machine with the 2.6.18-128.1.6.el5xen kernel,
both using 100MB disk caches. This holds true for both vanilla 1.4.10 and
various fixes I've been trying the last days.
Can anyone reproduce?
Cheers
- Felix
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#define TARGETSIZE (750 << 20)
#define CHUNKSIZE (1460)
#define NRCHUNKS (TARGETSIZE / CHUNKSIZE)
#define SIZE (NRCHUNKS * CHUNKSIZE)
static void fillmem(void *start, int nr)
{
memset(start, nr, CHUNKSIZE);
}
#define page_offset(buf, off) (0xfff & ((unsigned)(unsigned long)(buf)+(off)))
static int do_write = 1;
static int first_corrupt = -1;
static int sum_corrupt = 0;
static int chunkorder[NRCHUNKS];
static int order(int nr)
{
int i;
if (nr < 0 || nr >= NRCHUNKS)
return -1;
for (i = 0; i < NRCHUNKS; i++)
if (chunkorder[i] == nr)
return i;
return -2;
}
static void checkmem(void *buf, int nr)
{
unsigned int start = ~0u, end = 0;
unsigned char c = nr, *p = buf, differs = 0;
int i;
for (i = 0; i < CHUNKSIZE; i++) {
unsigned char got = *p++;
if (got != c) {
if (i < start)
start = i;
if (i > end)
end = i;
differs = got;
}
}
if (start < end) {
sum_corrupt++;
if ( first_corrupt == -1 )
first_corrupt = nr;
}
else {
if ( first_corrupt != -1 ) {
//printf("chunks %i through %i are corrupted\n", first_corrupt,
nr-1);
first_corrupt = -1;
}
}
}
static char *remap(int fd, char *mapping)
{
if (mapping) {
munmap(mapping, SIZE);
posix_fadvise(fd, 0, SIZE, POSIX_FADV_DONTNEED);
}
return mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
}
int main(int argc, char **argv)
{
char *mapping;
int fd, i;
printf("target size: %lu (%u MB)\n", SIZE, SIZE >> 20);
if ( argc > 1 )
if ( strncmp(argv[1], "-r", 2) == 0 ) {
do_write = 0;
printf("trying read test only\n");
}
/*
* Make some random ordering of writing the chunks to the
* memory map..
*
* Start with fully ordered..
*/
for (i = 0; i < NRCHUNKS; i++)
chunkorder[i] = i;
/* ..and then mix it up randomly */
srandom(time(NULL));
for (i = 0; i < NRCHUNKS; i++) {
int index = (unsigned int) random() % NRCHUNKS;
int nr = chunkorder[index];
chunkorder[index] = chunkorder[i];
chunkorder[i] = nr;
}
fd = open("mapfile", O_RDWR | O_TRUNC | O_CREAT, 0666);
if (fd < 0)
return -1;
if ( do_write ) {
if (ftruncate(fd, SIZE) < 0)
return -1;
mapping = remap(fd, NULL);
if (-1 == (int)(long)mapping)
return -1;
for (i = 0; i < NRCHUNKS; i++) {
int chunk = chunkorder[i];
printf("Writing chunk %d/%d (%d%%) \r",i, NRCHUNKS,
100*i/NRCHUNKS);
fillmem(mapping + chunk * CHUNKSIZE, chunk);
}
printf("\n");
}
/* Unmap, drop, and remap.. */
mapping = remap(fd, mapping);
/* .. and check */
for (i = 0; i < NRCHUNKS; i++) {
int chunk = i;
printf("Checking chunk %d/%d (%d%%) \r", i, NRCHUNKS,
100*i/NRCHUNKS);
checkmem(mapping + chunk * CHUNKSIZE, chunk);
}
printf("%i corrupted blocks (i.e. %i%%)\n",
sum_corrupt, 100 * sum_corrupt / NRCHUNKS );
printf("%i OK blocks\n", NRCHUNKS - sum_corrupt);
return 0;
}