This works cleanly and is understandable (something I would not say about the original version I wrote a couple of years back).
It emits some minor nags on shutdown related to cleaning up the term descriptors, but as far as I can see they are mostly harmless. I suspect that once I do an incremental on top to enable write IRQ semantics in the line.c family of drivers these will go away naturally. UBD tests out to 15% + faster, net is also faster even if you have one device. If you have let's say 20-30 devices, the speed difference becomes more substantial even without allocating different IRQs to different network devices. A. On 08/11/15 22:50, Anton Ivanov wrote: > Epoll based interrupt controller. > > IMPROVES: IO loop performance - no per fd lookups, allowing for > 15% IO speedup in minimal config going to 100s of % with many > devices - a N^N lookup is now replaced by a log(N) > > ADDS: True Write IRQ functionality > > OBSOLETES: The need to call reactivate_fd() in any driver which > has only read IRQ semantics. Write IRQs work, but will need to > be updated to use this fully. > > Potentially (with a change in API) will allow both edge and level > IRQ semantics. > > Pre-requisite for using packet mmap and multipacket read/write > which do not get along with poll() very well. > > Signed-off-by/: Anton Ivanov <aiva...@brocade.com> > --- > arch/um/drivers/line.c | 5 +- > arch/um/drivers/mconsole_kern.c | 2 - > arch/um/drivers/net_kern.c | 1 - > arch/um/drivers/port_kern.c | 1 - > arch/um/drivers/random.c | 1 - > arch/um/drivers/ubd_kern.c | 1 - > arch/um/include/shared/irq_user.h | 24 ++- > arch/um/include/shared/os.h | 13 +- > arch/um/kernel/irq.c | 412 > ++++++++++++++++++++++---------------- > arch/um/os-Linux/irq.c | 145 +++++--------- > 10 files changed, 321 insertions(+), 284 deletions(-) > > diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c > index 6208702..84384c8 100644 > --- a/arch/um/drivers/line.c > +++ b/arch/um/drivers/line.c > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > */ > @@ -283,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct > line *line, void *data) > if (err) > return err; > if (output) > - err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, > + err = um_request_irq(driver->write_irq, fd, IRQ_NONE, > line_write_interrupt, IRQF_SHARED, > driver->write_irq_name, data); > return err; > @@ -666,8 +667,6 @@ static irqreturn_t winch_interrupt(int irq, void *data) > tty_kref_put(tty); > } > out: > - if (winch->fd != -1) > - reactivate_fd(winch->fd, WINCH_IRQ); > return IRQ_HANDLED; > } > > diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c > index 29880c9..5e8881c 100644 > --- a/arch/um/drivers/mconsole_kern.c > +++ b/arch/um/drivers/mconsole_kern.c > @@ -95,7 +95,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id) > } > if (!list_empty(&mc_requests)) > schedule_work(&mconsole_work); > - reactivate_fd(fd, MCONSOLE_IRQ); > return IRQ_HANDLED; > } > > @@ -243,7 +242,6 @@ void mconsole_stop(struct mc_request *req) > (*req->cmd->handler)(req); > } > os_set_fd_block(req->originating_fd, 0); > - reactivate_fd(req->originating_fd, MCONSOLE_IRQ); > mconsole_reply(req, "", 0, 0); > } > > diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c > index f70dd54..82ea3a2 100644 > --- a/arch/um/drivers/net_kern.c > +++ b/arch/um/drivers/net_kern.c > @@ -137,7 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void > *dev_id) > schedule_work(&lp->work); > goto out; > } > - reactivate_fd(lp->fd, UM_ETH_IRQ); > > out: > spin_unlock(&lp->lock); > diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c > index 40ca5cc..b0e9ff3 100644 > --- a/arch/um/drivers/port_kern.c > +++ b/arch/um/drivers/port_kern.c > @@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused) > if (!port->has_connection) > continue; > > - reactivate_fd(port->fd, ACCEPT_IRQ); > while (port_accept(port)) > ; > port->has_connection = 0; > diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c > index dd16c90..a392828 100644 > --- a/arch/um/drivers/random.c > +++ b/arch/um/drivers/random.c > @@ -72,7 +72,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user > *buf, size_t size, > return ret ? : -EAGAIN; > > atomic_inc(&host_sleep_count); > - reactivate_fd(random_fd, RANDOM_IRQ); > add_sigio_fd(random_fd); > > add_wait_queue(&host_read_wait, &wait); > diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c > index e8ab93c..731982c 100644 > --- a/arch/um/drivers/ubd_kern.c > +++ b/arch/um/drivers/ubd_kern.c > @@ -466,7 +466,6 @@ static void ubd_handler(void) > blk_end_request(req->req, 0, req->length); > kfree(req); > } > - reactivate_fd(thread_fd, UBD_IRQ); > > list_for_each_safe(list, next_ele, &restart){ > ubd = container_of(list, struct ubd, restart); > diff --git a/arch/um/include/shared/irq_user.h > b/arch/um/include/shared/irq_user.h > index df56330..0eca64c 100644 > --- a/arch/um/include/shared/irq_user.h > +++ b/arch/um/include/shared/irq_user.h > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > */ > @@ -9,16 +10,23 @@ > #include <sysdep/ptrace.h> > > struct irq_fd { > - struct irq_fd *next; > - void *id; > - int fd; > - int type; > - int irq; > - int events; > - int current_events; > + void *id; > + int irq; > + int events; > +}; > + > + > +#define IRQ_READ 0 > +#define IRQ_WRITE 1 > +#define IRQ_NONE 2 > +#define MAX_IRQ_TYPE (IRQ_NONE + 1) > + > +struct irq_entry { > + struct irq_entry *next; > + int fd; > + struct irq_fd * irq_array[MAX_IRQ_TYPE + 1]; > }; > > -enum { IRQ_READ, IRQ_WRITE }; > > struct siginfo; > extern void sigio_handler(int sig, struct siginfo *unused_si, struct > uml_pt_regs *regs); > diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h > index 21d704b..3fe1249 100644 > --- a/arch/um/include/shared/os.h > +++ b/arch/um/include/shared/os.h > @@ -1,5 +1,6 @@ > /* > * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2015 Thomas Meyer (tho...@m3y3r.de) > * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > @@ -284,15 +285,17 @@ extern void halt_skas(void); > extern void reboot_skas(void); > > /* irq.c */ > -extern int os_waiting_for_events(struct irq_fd *active_fds); > -extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int > size_tmpfds); > + > +extern int os_setup_epoll(int maxevents); > +extern int os_waiting_for_events_epoll(void *kernel_events, int maxevents); > +extern int os_add_epoll_fd (int events, int fd, void * data); > +extern int os_mod_epoll_fd (int events, int fd, void * data); > +extern int os_del_epoll_fd (int fd); > + > extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void > *arg, > struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); > extern void os_free_irq_later(struct irq_fd *active_fds, > int irq, void *dev_id); > -extern int os_get_pollfd(int i); > -extern void os_set_pollfd(int i, int fd); > -extern void os_set_ioignore(void); > > /* sigio.c */ > extern int add_sigio_fd(int fd); > diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c > index 23cb935..516b13b 100644 > --- a/arch/um/kernel/irq.c > +++ b/arch/um/kernel/irq.c > @@ -1,4 +1,7 @@ > /* > + * Copyright (C) 2015 Brocade Communications Ltd > + * Author: Anton Ivanov aivanov@{brocade.com,kot-begemot.co.uk} > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: > @@ -18,6 +21,61 @@ > #include <os.h> > > /* > +* We are on the "kernel side" so we cannot pick up the sys/epoll.h > +* So we lift out of it the applicable key definitions. > +*/ > + > + > +enum EPOLL_EVENTS > + { > + EPOLLIN = 0x001, > +#define EPOLLIN EPOLLIN > + EPOLLPRI = 0x002, > +#define EPOLLPRI EPOLLPRI > + EPOLLOUT = 0x004, > +#define EPOLLOUT EPOLLOUT > + EPOLLRDNORM = 0x040, > +#define EPOLLRDNORM EPOLLRDNORM > + EPOLLRDBAND = 0x080, > +#define EPOLLRDBAND EPOLLRDBAND > + EPOLLWRNORM = 0x100, > +#define EPOLLWRNORM EPOLLWRNORM > + EPOLLWRBAND = 0x200, > +#define EPOLLWRBAND EPOLLWRBAND > + EPOLLMSG = 0x400, > +#define EPOLLMSG EPOLLMSG > + EPOLLERR = 0x008, > +#define EPOLLERR EPOLLERR > + EPOLLHUP = 0x010, > +#define EPOLLHUP EPOLLHUP > + EPOLLRDHUP = 0x2000, > +#define EPOLLRDHUP EPOLLRDHUP > + EPOLLONESHOT = (1 << 30), > +#define EPOLLONESHOT EPOLLONESHOT > + EPOLLET = (1 << 31) > +#define EPOLLET EPOLLET > + }; > + > + > +typedef union epoll_data > +{ > + void *ptr; > + int fd; > + uint32_t u32; > + uint64_t u64; > +} epoll_data_t; > + > +struct epoll_event > +{ > + uint32_t events; /* Epoll events */ > + epoll_data_t data; /* User data variable */ > +} __attribute__ ((__packed__)); > + > +#define MAX_EPOLL_EVENTS 16 > + > +static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; > + > +/* > * This list is accessed under irq_lock, except in sigio_handler, > * where it is safe from being modified. IRQ handlers won't change it - > * if an IRQ source has vanished, it will be freed by free_irqs just > @@ -25,44 +83,91 @@ > * list of irqs to free, with its own locking, coming back here to > * remove list elements, taking the irq_lock to do so. > */ > -static struct irq_fd *active_fds = NULL; > -static struct irq_fd **last_irq_ptr = &active_fds; > +static struct irq_entry *active_fds = NULL; > > extern void free_irqs(void); > > + > +static DEFINE_SPINLOCK(irq_lock); > + > + > +/* > + * Principles of Operation: > + * Each Epoll structure contains a pointer pointing back to an array > + * with irq entries for read, write and none and their matching event > + * masks. > + * This allows us to stop looking up "who talked" > + * We no longer need to enable/disable any polls while we process them > + * epoll will take care of that. The exemption to this (for now) are > + * character devices because of their own internal buffering, which > + * needs to be updated to leverage the new write IRQ semantics. > + * We can now support both read and write IRQs and have separate IRQs > + * for read and write ops. > + */ > + > + > void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs > *regs) > { > struct irq_fd *irq_fd; > - int n; > + struct irq_entry *irq_entry; > + unsigned long flags; > + > + int n, i, j; > > while (1) { > - n = os_waiting_for_events(active_fds); > - if (n <= 0) { > - if (n == -EINTR) > - continue; > - else break; > - } > > - for (irq_fd = active_fds; irq_fd != NULL; > - irq_fd = irq_fd->next) { > - if (irq_fd->current_events != 0) { > - irq_fd->current_events = 0; > - do_IRQ(irq_fd->irq, regs); > - } > + spin_lock_irqsave(&irq_lock, flags); > + > + n = os_waiting_for_events_epoll( > + &epoll_events, MAX_EPOLL_EVENTS > + ); > + > + > + if (n <= 0) { > + if (n == -EINTR) { continue; } > + else { break; } > } > + > + > + for (i = 0; i < n ; i++) { > + /* start from the data ptr, walk the tree branch */ > + irq_entry = (struct irq_entry *) > epoll_events[i].data.ptr; > + for (j = 0; j < MAX_IRQ_TYPE ; j ++ ) { > + irq_fd = irq_entry->irq_array[j]; > + if (irq_fd != NULL) { > + if (epoll_events[i].events & > irq_fd->events) { > + do_IRQ(irq_fd->irq, regs); > + } > + } > + } > + } > + spin_unlock_irqrestore(&irq_lock, flags); > } > > free_irqs(); > } > > -static DEFINE_SPINLOCK(irq_lock); > +static int update_events(struct irq_entry * irq_entry) { > + int i; > + int events = 0; > + struct irq_fd * irq_fd; > + for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) { > + irq_fd = irq_entry->irq_array[i]; > + if (irq_fd != NULL) { > + events = irq_fd->events | events; > + } > + } > + /* os_add_epoll will call os_mod_epoll if this already exists */ > + return os_add_epoll_fd(events, irq_entry->fd, irq_entry); > +} > + > > static int activate_fd(int irq, int fd, int type, void *dev_id) > { > - struct pollfd *tmp_pfd; > - struct irq_fd *new_fd, *irq_fd; > + struct irq_fd *new_fd; > + struct irq_entry * irq_entry; > unsigned long flags; > - int events, err, n; > + int i, err, events; > > err = os_set_fd_async(fd); > if (err < 0) > @@ -74,186 +179,150 @@ static int activate_fd(int irq, int fd, int type, void > *dev_id) > goto out; > > if (type == IRQ_READ) > - events = UM_POLLIN | UM_POLLPRI; > - else events = UM_POLLOUT; > - *new_fd = ((struct irq_fd) { .next = NULL, > - .id = dev_id, > - .fd = fd, > - .type = type, > - .irq = irq, > - .events = events, > - .current_events = 0 } ); > - > - err = -EBUSY; > - spin_lock_irqsave(&irq_lock, flags); > - for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { > - if ((irq_fd->fd == fd) && (irq_fd->type == type)) { > - printk(KERN_ERR "Registering fd %d twice\n", fd); > - printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq); > - printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id, > - dev_id); > - goto out_unlock; > - } > - } > - > + events |= EPOLLIN | EPOLLPRI; > if (type == IRQ_WRITE) > - fd = -1; > + events |= EPOLLOUT; > > - tmp_pfd = NULL; > - n = 0; > + *new_fd = ((struct irq_fd) { > + .id = dev_id, > + .irq = irq, > + .events = events > + }); > > - while (1) { > - n = os_create_pollfd(fd, events, tmp_pfd, n); > - if (n == 0) > - break; > + err = -EBUSY; > > - /* > - * n > 0 > - * It means we couldn't put new pollfd to current pollfds > - * and tmp_fds is NULL or too small for new pollfds array. > - * Needed size is equal to n as minimum. > - * > - * Here we have to drop the lock in order to call > - * kmalloc, which might sleep. > - * If something else came in and changed the pollfds array > - * so we will not be able to put new pollfd struct to pollfds > - * then we free the buffer tmp_fds and try again. > - */ > - spin_unlock_irqrestore(&irq_lock, flags); > - kfree(tmp_pfd); > + spin_lock_irqsave(&irq_lock, flags); > > - tmp_pfd = kmalloc(n, GFP_KERNEL); > - if (tmp_pfd == NULL) > - goto out_kfree; > + for (irq_entry = active_fds; irq_entry != NULL; irq_entry = > irq_entry->next) { > + if (irq_entry->fd == fd) break; > + } > > - spin_lock_irqsave(&irq_lock, flags); > + if (irq_entry == NULL) { > + irq_entry = kmalloc(sizeof(struct irq_entry), GFP_KERNEL); > + if (irq_entry == NULL) { > + printk(KERN_ERR > + "Failed to allocate new IRQ entry\n"); > + kfree(new_fd); > + goto out; > + } > + irq_entry->fd = fd; > + for (i = 0; i < MAX_IRQ_TYPE; i++) { > + irq_entry->irq_array[i] = NULL; > + } > + irq_entry->next = active_fds; > + active_fds = irq_entry; > } > > - *last_irq_ptr = new_fd; > - last_irq_ptr = &new_fd->next; > + if (irq_entry->irq_array[type] != NULL) { > + printk(KERN_ERR > + "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n", > + irq, fd, type, dev_id > + ); > + goto out_unlock; > + } else { > + irq_entry->irq_array[type] = new_fd; > + } > > + update_events(irq_entry); > + > spin_unlock_irqrestore(&irq_lock, flags); > > - /* > - * This calls activate_fd, so it has to be outside the critical > - * section. > - */ > - maybe_sigio_broken(fd, (type == IRQ_READ)); > + maybe_sigio_broken(fd, (type != IRQ_NONE)); > > return 0; > > out_unlock: > spin_unlock_irqrestore(&irq_lock, flags); > - out_kfree: > kfree(new_fd); > out: > return err; > } > > -static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) > -{ > - unsigned long flags; > - > - spin_lock_irqsave(&irq_lock, flags); > - os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); > - spin_unlock_irqrestore(&irq_lock, flags); > -} > - > -struct irq_and_dev { > - int irq; > - void *dev; > -}; > > -static int same_irq_and_dev(struct irq_fd *irq, void *d) > +static void do_free_by_irq_and_dev( > + struct irq_entry* irq_entry, > + unsigned int irq, > + void * dev > +) > { > - struct irq_and_dev *data = d; > - > - return ((irq->irq == data->irq) && (irq->id == data->dev)); > + int i; > + struct irq_fd * to_free; > + for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) { > + if (irq_entry->irq_array[i] != NULL) { > + if ( > + (irq_entry->irq_array[i]->irq == irq) && > + (irq_entry->irq_array[i]->id == dev) > + ) { > + to_free = irq_entry->irq_array[i]; > + irq_entry->irq_array[i] = NULL; > + update_events(irq_entry); > + kfree(to_free); > + } > + } > + } > } > > -static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) > -{ > - struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, > - .dev = dev }); > +void free_irq_by_fd(int fd) { > > - free_irq_by_cb(same_irq_and_dev, &data); > -} > + struct irq_entry *irq_entry, *prev = NULL; > + unsigned long flags; > + int i; > > -static int same_fd(struct irq_fd *irq, void *fd) > -{ > - return (irq->fd == *((int *)fd)); > + spin_lock_irqsave(&irq_lock, flags); > + for (irq_entry = active_fds; irq_entry != NULL; irq_entry = > irq_entry->next) { > + if (irq_entry->fd == irq_entry->fd) { > + os_del_epoll_fd(fd); /* ignore err, just do it */ > + for (i = 0; i < MAX_IRQ_TYPE ; i++) { > + if (irq_entry->irq_array[i] != NULL) { > + kfree(irq_entry->irq_array[i]); > + } > + } > + if (prev == NULL) { > + active_fds = irq_entry->next; > + } else { > + prev->next = irq_entry->next; > + } > + kfree(irq_entry); > + } else { > + prev = irq_entry; > + } > + } > + spin_unlock_irqrestore(&irq_lock, flags); > + > } > > -void free_irq_by_fd(int fd) > -{ > - free_irq_by_cb(same_fd, &fd); > -} > > -/* Must be called with irq_lock held */ > -static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) > -{ > - struct irq_fd *irq; > - int i = 0; > - int fdi; > - > - for (irq = active_fds; irq != NULL; irq = irq->next) { > - if ((irq->fd == fd) && (irq->irq == irqnum)) > - break; > - i++; > - } > - if (irq == NULL) { > - printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n", > - fd); > - goto out; > - } > - fdi = os_get_pollfd(i); > - if ((fdi != -1) && (fdi != fd)) { > - printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds " > - "and pollfds, fd %d vs %d, need %d\n", irq->fd, > - fdi, fd); > - irq = NULL; > - goto out; > - } > - *index_out = i; > - out: > - return irq; > -} > +static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { > > -void reactivate_fd(int fd, int irqnum) > -{ > - struct irq_fd *irq; > + struct irq_entry *irq_entry; > unsigned long flags; > - int i; > > spin_lock_irqsave(&irq_lock, flags); > - irq = find_irq_by_fd(fd, irqnum, &i); > - if (irq == NULL) { > - spin_unlock_irqrestore(&irq_lock, flags); > - return; > + for (irq_entry = active_fds; irq_entry != NULL; irq_entry = > irq_entry->next) { > + do_free_by_irq_and_dev(irq_entry, irq, dev); > } > - os_set_pollfd(i, irq->fd); > spin_unlock_irqrestore(&irq_lock, flags); > - > - add_sigio_fd(fd); > + > } > > -void deactivate_fd(int fd, int irqnum) > + > +void reactivate_fd(int fd, int irqnum) > { > - struct irq_fd *irq; > + struct irq_entry *irq_entry; > unsigned long flags; > - int i; > - > spin_lock_irqsave(&irq_lock, flags); > - irq = find_irq_by_fd(fd, irqnum, &i); > - if (irq == NULL) { > - spin_unlock_irqrestore(&irq_lock, flags); > - return; > + for (irq_entry = active_fds; irq_entry != NULL; irq_entry = > irq_entry->next) { > + if (irq_entry->fd == fd) { > + update_events(irq_entry); > + } > } > - > - os_set_pollfd(i, -1); > spin_unlock_irqrestore(&irq_lock, flags); > + > +} > > - ignore_sigio_fd(fd); > +void deactivate_fd(int fd, int irqnum) > +{ > + os_del_epoll_fd(fd); /* ignore err, just do it */ > } > EXPORT_SYMBOL(deactivate_fd); > > @@ -265,17 +334,16 @@ EXPORT_SYMBOL(deactivate_fd); > */ > int deactivate_all_fds(void) > { > - struct irq_fd *irq; > + struct irq_entry * irq_entry; > int err; > > - for (irq = active_fds; irq != NULL; irq = irq->next) { > - err = os_clear_fd_async(irq->fd); > - if (err) > - return err; > + for (irq_entry = active_fds; irq_entry != NULL; irq_entry = > irq_entry->next) { > + os_del_epoll_fd(irq_entry->fd); /* ignore err, just do it */ > + err = os_clear_fd_async(irq_entry->fd); > + if (err) { > + printk(KERN_ERR "Clear FD async failed with %d", err); > + } > } > - /* If there is a signal already queued, after unblocking ignore it */ > - os_set_ioignore(); > - > return 0; > } > > @@ -308,13 +376,13 @@ int um_request_irq(unsigned int irq, int fd, int type, > { > int err; > > - if (fd != -1) { > + err = request_irq(irq, handler, irqflags, devname, dev_id); > + > + if ((!err) && (fd != -1)) { > err = activate_fd(irq, fd, type, dev_id); > - if (err) > - return err; > } > > - return request_irq(irq, handler, irqflags, devname, dev_id); > + return err; > } > > EXPORT_SYMBOL(um_request_irq); > @@ -352,9 +420,9 @@ void __init init_IRQ(void) > int i; > > irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, > handle_edge_irq); > - > - for (i = 1; i < NR_IRQS; i++) > + for (i = 1; i < NR_IRQS - 1 ; i++) > irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); > + os_setup_epoll(MAX_EPOLL_EVENTS); > } > > /* > @@ -382,11 +450,11 @@ void __init init_IRQ(void) > * thread_info. > * > * There are three cases - > - * The first interrupt on the stack - sets up the thread_info and > + * The first interrupt on the stack - sets up the thread_info and > * handles the interrupt > - * A nested interrupt interrupting the copying of the thread_info - > + * A nested interrupt interrupting the copying of the thread_info - > * can't handle the interrupt, as the stack is in an unknown state > - * A nested interrupt not interrupting the copying of the > + * A nested interrupt not interrupting the copying of the > * thread_info - doesn't do any setup, just handles the interrupt > * > * The first job is to figure out whether we interrupted stack setup. > diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c > index b9afb74..837aa68 100644 > --- a/arch/um/os-Linux/irq.c > +++ b/arch/um/os-Linux/irq.c > @@ -1,4 +1,5 @@ > /* > + * Copyright (C) 2012 - 2014 Cisco Systems > * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) > * Licensed under the GPL > */ > @@ -6,6 +7,7 @@ > #include <stdlib.h> > #include <errno.h> > #include <poll.h> > +#include <sys/epoll.h> > #include <signal.h> > #include <string.h> > #include <irq_user.h> > @@ -16,117 +18,80 @@ > * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd > * and os_free_irq_by_cb, which are called under irq_lock. > */ > -static struct pollfd *pollfds = NULL; > -static int pollfds_num = 0; > -static int pollfds_size = 0; > > -int os_waiting_for_events(struct irq_fd *active_fds) > +/* epoll support */ > + > + > +static int epollfd = -1; > + > +int os_setup_epoll(int maxevents) { > + epollfd = epoll_create(maxevents); > + return epollfd; > +} > + > +int os_waiting_for_events_epoll(void *kernel_events, int maxevents) > { > - struct irq_fd *irq_fd; > - int i, n, err; > + int n, err; > > - n = poll(pollfds, pollfds_num, 0); > + n = epoll_wait(epollfd, > + (struct epoll_event *) kernel_events, maxevents, 0); > if (n < 0) { > err = -errno; > if (errno != EINTR) > - printk(UM_KERN_ERR "os_waiting_for_events:" > - " poll returned %d, errno = %d\n", n, errno); > + printk( > + UM_KERN_ERR "os_waiting_for_events:" > + " poll returned %d, error = %s\n", n, > + strerror(errno) > + ); > return err; > } > > - if (n == 0) > - return 0; > - > - irq_fd = active_fds; > - > - for (i = 0; i < pollfds_num; i++) { > - if (pollfds[i].revents != 0) { > - irq_fd->current_events = pollfds[i].revents; > - pollfds[i].fd = -1; > - } > - irq_fd = irq_fd->next; > - } > return n; > } > > -int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) > -{ > - if (pollfds_num == pollfds_size) { > - if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) { > - /* return min size needed for new pollfds area */ > - return (pollfds_size + 1) * sizeof(pollfds[0]); > - } > - > - if (pollfds != NULL) { > - memcpy(tmp_pfd, pollfds, > - sizeof(pollfds[0]) * pollfds_size); > - /* remove old pollfds */ > - kfree(pollfds); > - } > - pollfds = tmp_pfd; > - pollfds_size++; > - } else > - kfree(tmp_pfd); /* remove not used tmp_pfd */ > +int os_add_epoll_fd (int events, int fd, void * data) { > + struct epoll_event event; > + int result; > > - pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, > - .events = events, > - .revents = 0 }); > - pollfds_num++; > - > - return 0; > + event.data.ptr = data; > + event.events = events | EPOLLET; > + result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event); > + if ((result) && (errno == EEXIST)) { > + result = os_mod_epoll_fd (events, fd, data); > + } > + if (result) { > + printk("epollctl add err fd %d, %s\n", fd, strerror(errno)); > + } > + return result; > } > > -void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, > - struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) > -{ > - struct irq_fd **prev; > - int i = 0; > - > - prev = &active_fds; > - while (*prev != NULL) { > - if ((*test)(*prev, arg)) { > - struct irq_fd *old_fd = *prev; > - if ((pollfds[i].fd != -1) && > - (pollfds[i].fd != (*prev)->fd)) { > - printk(UM_KERN_ERR "os_free_irq_by_cb - " > - "mismatch between active_fds and " > - "pollfds, fd %d vs %d\n", > - (*prev)->fd, pollfds[i].fd); > - goto out; > - } > - > - pollfds_num--; > - > - /* > - * This moves the *whole* array after pollfds[i] > - * (though it doesn't spot as such)! > - */ > - memmove(&pollfds[i], &pollfds[i + 1], > - (pollfds_num - i) * sizeof(pollfds[0])); > - if (*last_irq_ptr2 == &old_fd->next) > - *last_irq_ptr2 = prev; > - > - *prev = (*prev)->next; > - if (old_fd->type == IRQ_WRITE) > - ignore_sigio_fd(old_fd->fd); > - kfree(old_fd); > - continue; > - } > - prev = &(*prev)->next; > - i++; > +int os_mod_epoll_fd (int events, int fd, void * data) { > + struct epoll_event event; > + int result; > + event.data.ptr = data; > + event.events = events; > + result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event); > + if (result) { > + printk("epollctl mod err fd %d, %s\n", fd, strerror(errno)); > } > - out: > - return; > + return result; > } > > -int os_get_pollfd(int i) > -{ > - return pollfds[i].fd; > +int os_del_epoll_fd (int fd) { > + struct epoll_event event; > + int result; > + result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event); > + if (result) { > + printk("epollctl del err %s\n", strerror(errno)); > + } > + return result; > } > > -void os_set_pollfd(int i, int fd) > +void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, > + struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) > { > - pollfds[i].fd = fd; > + printk("Someone invoking obsolete deactivate_by_CB!!!\n"); > + return; > } > > void os_set_ioignore(void) ------------------------------------------------------------------------------ _______________________________________________ User-mode-linux-devel mailing list User-mode-linux-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel