I am a bit late getting back to you on this but I do have a few comments The safe_read seems out of tune currently. Or maybe I am just totally confused :)
> This patch adds support for merging notifications on the ubd > notification file descriptor. Multiple transactions are processed > at a time resulting in 10-15% virtual disk speed improvement. > > The mechanics are rather primitive - no ring buffers, primitive > guaranteed flush and guaranteed to-record-size read. > > Signed-off-by: Anton Ivanov <aiva...@brocade.com> > --- > arch/um/drivers/ubd.h | 2 + > arch/um/drivers/ubd_kern.c | 156 ++++++++++++++++++++++++++++++++++++-------- > arch/um/drivers/ubd_user.c | 19 +++++- > arch/um/include/shared/os.h | 1 + > arch/um/os-Linux/file.c | 12 ++++ > 5 files changed, 161 insertions(+), 29 deletions(-) > > diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h > index 3b48cd2..2c5e6fd 100644 > --- a/arch/um/drivers/ubd.h > +++ b/arch/um/drivers/ubd.h > @@ -10,6 +10,8 @@ > extern int start_io_thread(unsigned long sp, int *fds_out); > extern int io_thread(void *arg); > extern int kernel_fd; > +extern void setup_ubd_pollfd(void * fds, int fd); > +extern int size_of_pollfd(void); > > #endif > > diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c > index 39ba207..9f30c50 100644 > --- a/arch/um/drivers/ubd_kern.c > +++ b/arch/um/drivers/ubd_kern.c > @@ -58,6 +58,12 @@ struct io_thread_req { > int error; > }; > > +static void * kernel_pfd = NULL; These are pointer to pointers? so this will be an array of 64 pointers kmalloced? > +struct io_thread_req **kernel_reqs; > + > +struct io_thread_req **helper_reqs; > + > static inline int ubd_test_bit(__u64 bit, unsigned char *data) > { > __u64 n; > @@ -442,6 +448,32 @@ static void do_ubd_request(struct request_queue * q); > static int thread_fd = -1; > static LIST_HEAD(restart); > This would be clearer if it was struct io_thread_req **buffer Ok this is reading MAX_SG pointers rather than the previous single pointer > +static int do_safe_read(int fd, void * buffer, int max_size, int > record_size){ > + unsigned char * buf; > + int n, size; > + > + size = 0; > + buf = (unsigned char *) buffer; > + > + return os_read_file(fd, buf + size, max_size - size); Your function ends here every thing else is dead code? > + > + do { > + n = os_read_file(fd, buf, max_size - size); Do you want to return on only no data or a set of complete pointers? e.g. size % sizeof(void *) == 0 maybe? then return size since we may have several already. // if we have not read anything yet we can return safely > + if ((size == 0) && (n == -EAGAIN)) > + return n; // update how much we have read so far > + if (n> 0) > + size = size + n; A bit of shuffeling here might make it clearer and // discard whatever we have read in so far if we were not /going to block/ but errored > + if ((n < 0) && (n != -EAGAIN)) > + return n; max_size should be a multiple of record size so the loop would exit anyway? It might be better to fold it into the while clause below. // when we have read the amount requested stop > + if (size == max_size) > + break; // stopping if we hit exactly record_size? so N full pointers have been read. > + } while ((size % record_size) != 0); // return how much we actually read > + return size; > +} > + > + > /* XXX - move this inside ubd_intr. */ > /* Called without dev->lock held, and only in interrupt context. */ > static void ubd_handler(void) > @@ -450,21 +482,37 @@ static void ubd_handler(void) > struct ubd *ubd; > struct list_head *list, *next_ele; > unsigned long flags; > - int n; > + int n, rcount, i; > > while(1){ > - n = os_read_file(thread_fd, &req, > - sizeof(struct io_thread_req *)); > - if(n != sizeof(req)){ > - if(n == -EAGAIN) > - break; > - printk(KERN_ERR "spurious interrupt in ubd_handler, " > - "err = %d\n", -n); > - return; > + n = do_safe_read(thread_fd, helper_reqs, > + sizeof(struct io_thread_req *) * MAX_SG, > + sizeof(struct io_thread_req *) > + ); // no data pending so drop out of the loop > + if(n == -EAGAIN){ > + break; > + } > + if(n < 0){ > + printk("io_thread - read failed, fd = %d, " > + "err = %d\n", thread_fd, -n); > + } // if we have a partial pointer complain > + if(n % sizeof(struct io_thread_req *) != 0){ > + printk("kernel_ubd_io - invalid read, fd = %d, " > + "read = %d\n", thread_fd, n); > + continue; > } > > - blk_end_request(req->req, 0, req->length); > - kfree(req); > + rcount = n / sizeof(struct io_thread_req *); // step through each io_thread_req * in order > + for (i = 0; i < rcount; i++) { Perhaps *helper_reqs[i] might be clearer dereferencing pointers to pointers is often hard to understand. > + req = * (helper_reqs + i); > + blk_end_request(req->req, 0, req->length); > + kfree(req); > + > + } > } > reactivate_fd(thread_fd, UBD_IRQ); > > @@ -1080,6 +1128,26 @@ late_initcall(ubd_init); > static int __init ubd_driver_init(void){ > unsigned long stack; > int err; > + > + kernel_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL); > + if (kernel_reqs == NULL) { > + printk("Failed to allocate memory for req buffer\n"); > + return 0; > + } > + > + > + kernel_pfd = kmalloc(size_of_pollfd(), GFP_KERNEL); > + if (kernel_pfd == NULL) { > + printk("Failed to allocate memory for pollfd\n"); > + return 0; > + } > + > + helper_reqs = kmalloc(MAX_SG * sizeof(struct io_thread_req *), GFP_KERNEL); > + if (helper_reqs == NULL) { Failed to allocate memory for array of pointers to req buffers > + printk("Failed to allocate memory for req buffer\n"); > + return 0; > + } > + > > /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ > if(global_openflags.s){ > @@ -1458,30 +1526,62 @@ static int io_count = 0; > int io_thread(void *arg) > { > struct io_thread_req *req; > - int n; > + unsigned char * buffer; > + int n, rcount, i; > > os_fix_helper_signals(); > > + printk("Starting UBD helper thread\n"); > + > while(1){ > - n = os_read_file(kernel_fd, &req, > - sizeof(struct io_thread_req *)); > - if(n != sizeof(struct io_thread_req *)){ > - if(n < 0) The old code was missing the test for EAGAIN it was not too likely to fail a 4 byte read or 8 byte on 64bit but still it should have had it just in case. > - printk("io_thread - read failed, fd = %d, " > - "err = %d\n", kernel_fd, -n); > - else { > - printk("io_thread - short read, fd = %d, " > - "length = %d\n", kernel_fd, n); > - } Ok, but why do we want to sit in poll here rather than read? we are nonblocking on purpose right? why build a blocking non-blocking read? > + setup_ubd_pollfd(kernel_pfd, kernel_fd); > + os_poll(kernel_pfd, 1, -1); > + n = do_safe_read(kernel_fd, kernel_reqs, > + sizeof(struct io_thread_req *) * MAX_SG, > + sizeof(struct io_thread_req *) * MAX_SG > + ); > + if(n == -EAGAIN){ it is annoying when diff spilts things so oddly this closes the if n != sizeof in the old code? I guess it was supposed to read the rest of the req but if would read it at the beginning of the buffer? > continue; > } > - io_count++; > - do_io(req); > - n = os_write_file(kernel_fd, &req, > - sizeof(struct io_thread_req *)); > - if(n != sizeof(struct io_thread_req *)) > + if(n < 0){ > + printk("io_thread - read failed, fd = %d, " > + "err = %d\n", kernel_fd, -n); > + } Do you get a gain here? to try to read more than one pointer at a time also you are testing for multiple of the size of the pointer? > + if(n % sizeof(struct io_thread_req *) != 0){ > + printk("io_thread - invalid read, fd = %d, " > + "read = %d\n", kernel_fd, n); > + continue; > + } > + > + rcount = n / sizeof(struct io_thread_req *); > + > + for (i = 0; i < rcount; i++) { > + > + io_count++; > + > + req = * (kernel_reqs + i); > + > + do_io(req); > + > + } > + > + buffer = (unsigned char *) kernel_reqs; > + > + while (n> 0) { > + i = os_write_file(kernel_fd, buffer, n); > + if(i>= 0){ > + buffer = buffer + i; > + n = n - i; > + } else { // We would block try again later > + if(i != -EAGAIN) > + break; > + } > + } > + > + if(n> 0) > printk("io_thread - write failed, fd = %d, err = %d\n", > - kernel_fd, -n); > + kernel_fd, -i); > } > > return 0; > diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c > index e376f9b..3abec4f 100644 > --- a/arch/um/drivers/ubd_user.c > +++ b/arch/um/drivers/ubd_user.c > @@ -17,10 +17,22 @@ > #include <sys/param.h> > #include <endian.h> > #include <byteswap.h> > +#include <poll.h> > > #include "ubd.h" > #include <os.h> > > +void setup_ubd_pollfd(void * fds, int fd) { > + struct pollfd * pfds = (struct pollfd *) fds; > + pfds->fd = fd; > + pfds->events = POLLIN | POLLPRI; > + pfds->revents = 0; > +} > + > +int size_of_pollfd(void) { > + return sizeof(struct pollfd); > +} > + > int start_io_thread(unsigned long sp, int *fd_out) > { > int pid, fds[2], err; > @@ -36,10 +48,15 @@ int start_io_thread(unsigned long sp, int *fd_out) > > err = os_set_fd_block(*fd_out, 0); > if (err) { > - printk("start_io_thread - failed to set nonblocking I/O.\n"); > + printk("start_io_thread - failed to set nonblocking I/O - kernel_fd.\n"); > goto out_close; > } > > + err = os_set_fd_block(kernel_fd, 0); > + if (err) { > + printk("start_io_thread - failed to set nonblocking I/O - user_fd.\n"); > + goto out_close; > + } > pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL); > if(pid < 0){ > err = -errno; > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h > index 7a04ddd..0b75efa 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -149,6 +149,7 @@ extern int os_file_size(const char *file, unsigned long > long *size_out); > extern int os_pread_file(int fd, void *buf, int len, unsigned long long > offset); > extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long > long offset); > extern int os_file_modtime(const char *file, unsigned long *modtime); > +extern int os_poll(void *fds, unsigned int nfds, int timeout); > extern int os_pipe(int *fd, int stream, int close_on_exec); > extern int os_set_fd_async(int fd); > extern int os_clear_fd_async(int fd); > diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c > index 2db18cb..726b1e1 100644 > --- a/arch/um/os-Linux/file.c > +++ b/arch/um/os-Linux/file.c > @@ -14,6 +14,7 @@ > #include <sys/stat.h> > #include <sys/un.h> > #include <sys/types.h> > +#include <poll.h> > #include <os.h> > > static void copy_stat(struct uml_stat *dst, const struct stat64 *src) > @@ -355,6 +356,17 @@ int os_file_modtime(const char *file, unsigned long > *modtime) > return 0; > } > > +int os_poll(void *fds, unsigned int nfds, int timeout) > +{ > + int n; > + > + CATCH_EINTR(n = poll((struct pollfd *) fds, nfds, timeout)); > + if (n < 0) > + return -errno; > + > + return n; > +} > + > int os_set_exec_close(int fd) > { > int err; ------------------------------------------------------------------------------ Site24x7 APM Insight: Get Deep Visibility into Application Performance APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month Monitor end-to-end web transactions and take corrective actions now Troubleshoot faster and improve end-user experience. Signup Now! http://pubads.g.doubleclick.net/gampad/clk?id=267308311&iu=/4140 _______________________________________________ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel