Hi Peng,

PVFS2 uses Asynchronous I/O to perform reads and writes of the actual data. The call to look for is lio_listio in dbpf-bstream.c, but tracing that may not be what you want since it just posts the I/O operation. Service of the actual I/O is done in a separate AIO thread at which point the aio_progress_notification function is called to specify completion. If you just want to trace I/O calls, there's already an event tracing system in place, although it may not give you precisely what you want. The way to turn it on is with the pvfs2-set-eventmask util in src/apps/admin. I think for trove you can do:

pvfs2-set-eventmask -m <mountpoint>  -a 0x4 -o 0xFFFF

That should turn on the trove api mask at least. You can find the mask definitions in include/pvfs2-event.h.

-sam

On Jan 13, 2006, at 11:02 PM, Peng Gu wrote:

I was trying to collect PVFS2 file data I/O traces. Since trove is the
I/O module for pvfs2, and DBPF is current the only implementation of
trove (I hope the document I read is up-to-date). So I try to keep
records of the system calls in DBPF as my file I/O trace. ( I am not
worrying about the metadata right now so I did not care about the
Berkeley DB).

So I modified the dbpf.h in "src/io/trove/trove-dbpf/". The content of
dbpf.h.patch when I type (dbpf.h.bak is the original version in
pvfs2-1.3.2 release, while dbpf.h is my modified version) "diff
dbpf.h.bak dbpf.h > dbpf.h.patch", is included at the bottom of this
mail. (I did not modify any files other than dbpf.h)

The problem of my approach is that, the _read_ and _write_ operation
did not get recorded, while the open, close, unlink, fstat, ftruncate
operations did get recorded.  A small piece of my trace looks like:

1137167057.187977 1137167057.188007 9999 OPEN
//pvfs2-storage-space/5880a642/bstreams/00000059/fffffffb.bstream = -1
1137167057.188020 1137167057.188100 9999 OPEN
//pvfs2-storage-space/5880a642/bstreams/00000059/fffffffb.bstream = 20
= 20
1137167116.054536 1137167116.054555 9999 FSTAT 20 = 0
1137167124.430523 1137167124.430544 9999 CLOSE 18 = 0
1137167124.430663 1137167124.688784 9999 UNLINK
//pvfs2-storage-space/5880a642/bstreams/00000000/100000000.bstream = 0
1137167124.692452 1137167124.692474 9999 UNLINK
//pvfs2-storage-space/5880a642/bstreams/00000062/000ffffe.bstream = -1

My question is: am I missing anything so that _read_ and _write_
operations did not get recorded, or the file data access in dbpf was
handled by other system calls than read and write? Further more, what
should I do to solve this problem?

And here is the content of dbpf.h.patch:

418,426c418,544
< #define DBPF_OPEN   open
< #define DBPF_WRITE  write
< #define DBPF_LSEEK  lseek
< #define DBPF_READ   read
< #define DBPF_CLOSE  close
< #define DBPF_UNLINK unlink
< #define DBPF_SYNC   fsync
< #define DBPF_RESIZE ftruncate
< #define DBPF_FSTAT  fstat
---
#define DBPF_OPEN   lsfs_open   //Yes
#define DBPF_WRITE  lsfs_write
#define DBPF_READ   lsfs_read
#define DBPF_CLOSE  lsfs_close  //Yes
#define DBPF_LSEEK  lsfs_lseek
#define DBPF_UNLINK lsfs_unlink //Yes
#define DBPF_SYNC   lsfs_fsync
#define DBPF_RESIZE lsfs_ftruncate  //Yes
#define DBPF_FSTAT  lsfs_fstat  //Yes

/*
 * Prepare the trace for LSFS (refer to Dong' work), in the format of
 * "time  | pid | opcode |parameter |return value"
 * Since time and pid are common, they are provided in here.
 * Other fields should be provided by the corresponding functions,
 * such as DBPF_OPEN, DBPF_WRITE, ...
 */
#define lsfs_print_system_time(output) \
    do { \
        struct timeval tv; \
        gettimeofday(&tv, 0); \
        fprintf(output, "%010ld.%06ld ", tv.tv_sec, tv.tv_usec); \
    } while (0)

#define lsfs_print_current_pid(output) \
    do { \
        pid_t pid = 9999; \
        fprintf(output, "%d ", pid); \
    } while (0)

#define lsfs_print_trace(output, fmt, args...) \
    do { \
        fprintf(output, fmt, ##args); \
        fflush(output); \
    } while (0)

// ssize_t = int, size_t = unsigned int, off_t = long, mode_t = unsigned short static inline int lsfs_open(const char *pathname, int flags, mode_t mode)
{
    int ret;
    lsfs_print_system_time(stderr);
    ret=open(pathname, flags, mode);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %s = %d\n", "OPEN", pathname, ret);
    return ret;
}
static inline ssize_t lsfs_write(int fd, const void *buf, size_t count)
{
    ssize_t ret;
    lsfs_print_system_time(stderr);
    ret=write(fd, buf, count);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %d = %d\n", "WRITE", fd, ret);
    return ret;
}
static inline ssize_t lsfs_read(int fd, void *buf, size_t count)
{
    ssize_t ret;
    lsfs_print_system_time(stderr);
    ret=read(fd, buf, count);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %d = %d\n", "READ", fd, ret);
    return ret;
}
static inline int lsfs_close(int fd)
{
    int ret;
    lsfs_print_system_time(stderr);
    ret=close(fd);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %d = %d\n", "CLOSE", fd, ret);
    return ret;
}
static inline off_t lsfs_lseek(int fd, off_t offset, int whence)
{
    off_t ret;
    lsfs_print_system_time(stderr);
    ret=lseek(fd, offset, whence);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %d = %lld\n", "LSEEK", fd, ret);
    return ret;
}
static inline int lsfs_unlink(const char *pathname)
{
    int ret;
    lsfs_print_system_time(stderr);
    ret=unlink(pathname);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
lsfs_print_trace(stderr, "%s %s = %d\n", "UNLINK", pathname, ret);
    return ret;
}
static inline int lsfs_fsync(int fd)
{
    int ret;
    lsfs_print_system_time(stderr);
    ret=fsync(fd);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %d = %d\n", "FSYNC", fd, ret);
    return ret;
}
static inline int lsfs_ftruncate(int fd, off_t length)
{
    int ret;
    lsfs_print_system_time(stderr);
    ret=ftruncate(fd,length);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %d = %d\n", "FTRUNCATE", fd, ret);
    return ret;
}
static inline int lsfs_fstat(int fd, struct stat *buf)
{
    int ret;
    lsfs_print_system_time(stderr);
    ret=fstat(fd, buf);
    lsfs_print_system_time(stderr);
    lsfs_print_current_pid(stderr);
    lsfs_print_trace(stderr, "%s %d = %d\n", "FSTAT", fd, ret);
    return ret;
}

Sincerely
Peng

_______________________________________________
PVFS2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers


_______________________________________________
PVFS2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to