I'm opening an MMC block device node with O_DIRECT on an MSM7k series
processor with Android (sort of a stress test, and I want to be sure
reads and writes go to the device and not just to the block cache). I'm
seeing that the data read back doesn't match the data written. There
are known problems with O_DIRECT on ARM with aliasing VIPT caches,
discussed at length on this thread:
http://lkml.indiana.edu/hypermail/linux/kernel/0811.2/01044.html
Based on that discussion, writes should be okay, but there may be
alaiasing with PIO reads if the driver doesn't call flush_dcache_page()
after the transfer. However, I am seeing problems on writes, and I
don't see this issue at all on another ARMv6TEJ-based board I have
running 2.6.31-rc4 (with glibc-based userspace, not Android) and having
an aliasing VIPT cache. The data errors I see look like they could
certainly be from cache aliasing though -- the incorrect data always
comes in 32-byte chunks aligned to a 32-byte boundary, and the data is
always the same as what should have been written at the same offset in
the previous data transfer. So I'm suspecting aliasing at this point,
but I'm still considering that it could be something else.
I'm attaching source for a read/write test that produces this behavior.
All it does is iterate over the entire device, writing a 64KB,
locally-unique data pattern using O_DIRECT and reading the data back into
a seperate buffer for verification. I'm seeing failed verification
almost every time I run the test with a 2 GB SD card.
I'm in a difficult debugging sitation for this issue. I can't build the
kernel for the device (I'm working on getting the source to allow me to
do this), and I don't have documentation for the processor. I also
don't know whether the msm_sdcc driver is using DMA or PIO for the
transfer, although in my tracing of the code paths it doesn't look like
it should matter because in either case the cache lines for the
userspace mapping should be cleaned and invalidated before the transfer
starts. Are there any peculiarities of Android kernel modifications or
of the MSM7k that could be causing this sort of behavior?
Thanks,
Seth
--~--~---------~--~----~------------~-------~--~----~
unsubscribe: [email protected]
website: http://groups.google.com/group/android-kernel
-~----------~----~----~----~------~----~------~--~---
/* For large-file support */
#define _FILE_OFFSET_BITS 64
#define _LARGEFILE_SOURCE
#define _LARGEFILE64_SOURCE
/* For O_DIRECT */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/fs.h>
#define NUM_TEST_BLKS 128
/*
* Allocate page-aligned memory. Could use posix_memalign(3), but some
* systems don't support it. Also pre-faults memory since we'll be using
* it all right away anyway.
*/
static void *pagealign_alloc(size_t size)
{
void *ret = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | MAP_LOCKED,
-1, 0);
if (ret == MAP_FAILED) {
perror("mmap");
ret = NULL;
}
return ret;
}
static void pagealign_free(void *addr, size_t size)
{
int ret = munmap(addr, size);
if (ret == -1)
perror("munmap");
}
static ssize_t do_read(int fd, void *buf, off64_t start, size_t count)
{
ssize_t ret;
size_t bytes_read = 0;
lseek64(fd, start, SEEK_SET);
do {
ret = read(fd, buf + bytes_read, count - bytes_read);
if (ret == -1) {
perror("read");
return -1;
} else if (ret == 0) {
fprintf(stderr, "Unexpected end-of-file\n");
return -1;
}
bytes_read += ret;
} while (bytes_read < count);
return bytes_read;
}
static ssize_t do_write(int fd, const void *buf, off64_t start, size_t count)
{
ssize_t ret;
size_t bytes_out = 0;
lseek64(fd, start, SEEK_SET);
do {
ret = write(fd, buf + bytes_out, count - bytes_out);
if (ret == -1) {
perror("write");
return -1;
} else if (ret == 0) {
fprintf(stderr, "write returned 0\n");
return -1;
}
bytes_out += ret;
} while (bytes_out < count);
return bytes_out;
}
/*
* Initializes test buffer with locally-unique test pattern. High 16-bits of
* each 32-bit word contain first disk block number of the test area, low
* 16-bits contain word offset into test area. The goal is that a given test
* area should never contain the same data as a nearby test area, and that the
* data for a given test area be easily reproducable given the start block and
* test area size.
*/
static void init_test_buf(void *buf, uint64_t start_blk, size_t len)
{
uint32_t *data = buf;
size_t i;
len /= sizeof(uint32_t);
for (i = 0; i < len; i++)
data[i] = (start_blk & 0xFFFF) << 16 | (i & 0xFFFF);
}
static void dump_hex(const void *buf, int len)
{
const uint8_t *data = buf;
int i;
char ascii_buf[17];
ascii_buf[16] = '\0';
for (i = 0; i < len; i++) {
int val = data[i];
int off = i % 16;
if (off == 0)
printf("%08x ", i);
printf("%02x ", val);
ascii_buf[off] = isprint(val) ? val : '.';
if (off == 15)
printf(" %-16s\n", ascii_buf);
}
i %= 16;
if (i) {
ascii_buf[i] = '\0';
while (i++ < 16)
printf(" ");
printf(" %-16s\n", ascii_buf);
}
}
static void update_progress(int current, int total)
{
double pct_done = (double)current * 100 / total;
printf("Testing area %d/%d (%6.2f%% complete)\r", current, total,
pct_done);
fflush(stdout);
}
int main(int argc, const char *argv[])
{
int ret = 1;
const char *path;
int fd;
struct stat stat;
void *read_buf = NULL, *write_buf = NULL;
int blk_size;
uint64_t num_blks;
size_t test_size;
int test_areas, i;
if (argc != 2) {
printf("Usage: alias_test blkdev_path\n");
exit(1);
}
path = argv[1];
fd = open(path, O_RDWR | O_DIRECT | O_LARGEFILE);
if (fd == -1) {
perror("open");
exit(1);
}
if (fstat(fd, &stat) == -1) {
perror("stat");
goto cleanup;
} else if (!S_ISBLK(stat.st_mode)) {
fprintf(stderr, "%s is not a block device\n", path);
goto cleanup;
}
if (ioctl(fd, BLKSSZGET, &blk_size) == -1) {
perror("ioctl");
goto cleanup;
}
if (ioctl(fd, BLKGETSIZE64, &num_blks) == -1) {
perror("ioctl");
goto cleanup;
}
num_blks /= blk_size;
test_size = (size_t)blk_size * NUM_TEST_BLKS;
read_buf = pagealign_alloc(test_size);
write_buf = pagealign_alloc(test_size);
if (!read_buf || !write_buf) {
fprintf(stderr, "Error allocating test buffers\n");
goto cleanup;
}
/*
* Start the actual test. Go through the entire device, writing
* locally-unique patern to each test block and then reading it
* back.
*/
if (num_blks / NUM_TEST_BLKS > INT_MAX) {
printf("Warning: Device too large for test variables\n");
printf("Entire device will not be tested\n");
test_areas = INT_MAX;
} else {
test_areas = num_blks / NUM_TEST_BLKS;
}
printf("Starting test\n");
for (i = 0; i < test_areas; i++) {
uint64_t cur_blk = (uint64_t)i * NUM_TEST_BLKS;
update_progress(i + 1, test_areas);
init_test_buf(write_buf, cur_blk, test_size);
if (do_write(fd, write_buf, cur_blk * blk_size, test_size) !=
(ssize_t)test_size) {
fprintf(stderr, "write failed, aborting test\n");
goto cleanup;
}
if (do_read(fd, read_buf, cur_blk * blk_size, test_size) !=
(ssize_t)test_size) {
fprintf(stderr, "read failed, aborting test\n");
goto cleanup;
}
if (memcmp(write_buf, read_buf, test_size)) {
printf("Readback verification failed at block %llu\n\n",
cur_blk);
printf("Written data:\n");
dump_hex(write_buf, test_size);
printf("\nRead data:\n");
dump_hex(read_buf, test_size);
goto cleanup;
}
}
printf("\nTest complete\n");
ret = 0;
cleanup:
if (read_buf)
pagealign_free(read_buf, test_size);
if (write_buf)
pagealign_free(write_buf, test_size);
close(fd);
return ret;
}