Epoll based interrupt controller.
IMPROVES: IO loop performance - no per fd lookups, allowing for
15% IO speedup in minimal config going to 100s of % with many
devices - a N^N lookup is now replaced by a log(N)
ADDS: True Write IRQ functionality
OBSOLETES: The need to call reactivate_fd() in any driver which
has only read IRQ semantics. Write IRQs work, but will need to
be updated to use this fully.
Potentially (with a change in API) will allow both edge and level
IRQ semantics.
Pre-requisite for using packet mmap and multipacket read/write
which do not get along with poll() very well.
Signed-off-by: Anton Ivanov <[email protected]>
---
arch/um/drivers/chan_kern.c | 10 +-
arch/um/drivers/line.c | 5 +-
arch/um/drivers/mconsole_kern.c | 2 -
arch/um/drivers/net_kern.c | 1 -
arch/um/drivers/port_kern.c | 1 -
arch/um/drivers/random.c | 1 -
arch/um/drivers/ubd_kern.c | 28 +--
arch/um/include/shared/irq_user.h | 24 ++-
arch/um/include/shared/os.h | 16 +-
arch/um/kernel/irq.c | 438 +++++++++++++++++++++++---------------
arch/um/kernel/sigio.c | 2 +-
arch/um/os-Linux/file.c | 19 ++
arch/um/os-Linux/irq.c | 150 ++++++-------
13 files changed, 382 insertions(+), 315 deletions(-)
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index acbe6c6..49717f6 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -244,14 +244,12 @@ void close_chan(struct line *line)
void deactivate_chan(struct chan *chan, int irq)
{
- if (chan && chan->enabled)
- deactivate_fd(chan->fd, irq);
+/* NOP with epoll controller */
}
void reactivate_chan(struct chan *chan, int irq)
{
- if (chan && chan->enabled)
- reactivate_fd(chan->fd, irq);
+/* NOP with epoll controller */
}
int write_chan(struct chan *chan, const char *buf, int len,
@@ -265,8 +263,6 @@ int write_chan(struct chan *chan, const char *buf, int len,
n = chan->ops->write(chan->fd, buf, len, chan->data);
if (chan->primary) {
ret = n;
- if ((ret == -EAGAIN) || ((ret >= 0) && (ret < len)))
- reactivate_fd(chan->fd, write_irq);
}
return ret;
}
@@ -564,8 +560,6 @@ void chan_interrupt(struct line *line, int irq)
tty_insert_flip_char(port, c, TTY_NORMAL);
} while (err > 0);
- if (err == 0)
- reactivate_fd(chan->fd, irq);
if (err == -EIO) {
if (chan->primary) {
tty_port_tty_hangup(&line->port, false);
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 6208702..84384c8 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2012 - 2014 Cisco Systems
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
@@ -283,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct
line *line, void *data)
if (err)
return err;
if (output)
- err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+ err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
line_write_interrupt, IRQF_SHARED,
driver->write_irq_name, data);
return err;
@@ -666,8 +667,6 @@ static irqreturn_t winch_interrupt(int irq, void *data)
tty_kref_put(tty);
}
out:
- if (winch->fd != -1)
- reactivate_fd(winch->fd, WINCH_IRQ);
return IRQ_HANDLED;
}
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 29880c9..5e8881c 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -95,7 +95,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
}
if (!list_empty(&mc_requests))
schedule_work(&mconsole_work);
- reactivate_fd(fd, MCONSOLE_IRQ);
return IRQ_HANDLED;
}
@@ -243,7 +242,6 @@ void mconsole_stop(struct mc_request *req)
(*req->cmd->handler)(req);
}
os_set_fd_block(req->originating_fd, 0);
- reactivate_fd(req->originating_fd, MCONSOLE_IRQ);
mconsole_reply(req, "", 0, 0);
}
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f70dd54..82ea3a2 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -137,7 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
schedule_work(&lp->work);
goto out;
}
- reactivate_fd(lp->fd, UM_ETH_IRQ);
out:
spin_unlock(&lp->lock);
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 40ca5cc..b0e9ff3 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused)
if (!port->has_connection)
continue;
- reactivate_fd(port->fd, ACCEPT_IRQ);
while (port_accept(port))
;
port->has_connection = 0;
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index dd16c90..a392828 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -72,7 +72,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user
*buf, size_t size,
return ret ? : -EAGAIN;
atomic_inc(&host_sleep_count);
- reactivate_fd(random_fd, RANDOM_IRQ);
add_sigio_fd(random_fd);
add_wait_queue(&host_read_wait, &wait);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index e8ab93c..ad24ac7 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -466,7 +466,6 @@ static void ubd_handler(void)
blk_end_request(req->req, 0, req->length);
kfree(req);
}
- reactivate_fd(thread_fd, UBD_IRQ);
list_for_each_safe(list, next_ele, &restart){
ubd = container_of(list, struct ubd, restart);
@@ -535,11 +534,7 @@ static int read_cow_bitmap(int fd, void *buf, int offset,
int len)
{
int err;
- err = os_seek_file(fd, offset);
- if (err < 0)
- return err;
-
- err = os_read_file(fd, buf, len);
+ err = os_pread_file(fd, buf, len, offset);
if (err < 0)
return err;
@@ -1377,14 +1372,8 @@ static int update_bitmap(struct io_thread_req *req)
if(req->cow_offset == -1)
return 0;
- n = os_seek_file(req->fds[1], req->cow_offset);
- if(n < 0){
- printk("do_io - bitmap lseek failed : err = %d\n", -n);
- return 1;
- }
-
- n = os_write_file(req->fds[1], &req->bitmap_words,
- sizeof(req->bitmap_words));
+ n = os_pwrite_file(req->fds[1], &req->bitmap_words,
+ sizeof(req->bitmap_words), req->cow_offset);
if(n != sizeof(req->bitmap_words)){
printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
req->fds[1]);
@@ -1399,7 +1388,6 @@ static void do_io(struct io_thread_req *req)
char *buf;
unsigned long len;
int n, nsectors, start, end, bit;
- int err;
__u64 off;
if (req->op == UBD_FLUSH) {
@@ -1428,18 +1416,12 @@ static void do_io(struct io_thread_req *req)
len = (end - start) * req->sectorsize;
buf = &req->buffer[start * req->sectorsize];
- err = os_seek_file(req->fds[bit], off);
- if(err < 0){
- printk("do_io - lseek failed : err = %d\n", -err);
- req->error = 1;
- return;
- }
if(req->op == UBD_READ){
n = 0;
do {
buf = &buf[n];
len -= n;
- n = os_read_file(req->fds[bit], buf, len);
+ n = os_pread_file(req->fds[bit], buf, len, off);
if (n < 0) {
printk("do_io - read failed, err = %d "
"fd = %d\n", -n, req->fds[bit]);
@@ -1449,7 +1431,7 @@ static void do_io(struct io_thread_req *req)
} while((n < len) && (n != 0));
if (n < len) memset(&buf[n], 0, len - n);
} else {
- n = os_write_file(req->fds[bit], buf, len);
+ n = os_pwrite_file(req->fds[bit], buf, len, off);
if(n != len){
printk("do_io - write failed err = %d "
"fd = %d\n", -n, req->fds[bit]);
diff --git a/arch/um/include/shared/irq_user.h
b/arch/um/include/shared/irq_user.h
index df56330..0eca64c 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2012 - 2014 Cisco Systems
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
@@ -9,16 +10,23 @@
#include <sysdep/ptrace.h>
struct irq_fd {
- struct irq_fd *next;
- void *id;
- int fd;
- int type;
- int irq;
- int events;
- int current_events;
+ void *id;
+ int irq;
+ int events;
+};
+
+
+#define IRQ_READ 0
+#define IRQ_WRITE 1
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+struct irq_entry {
+ struct irq_entry *next;
+ int fd;
+ struct irq_fd * irq_array[MAX_IRQ_TYPE + 1];
};
-enum { IRQ_READ, IRQ_WRITE };
struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si, struct
uml_pt_regs *regs);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 21d704b..46daa6e 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
+ * Copyright (C) 2012 - 2014 Cisco Systems
* Copyright (C) 2015 Thomas Meyer ([email protected])
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
@@ -146,6 +147,8 @@ extern int os_read_file(int fd, void *buf, int len);
extern int os_write_file(int fd, const void *buf, int count);
extern int os_sync_file(int fd);
extern int os_file_size(const char *file, unsigned long long *size_out);
+extern int os_pread_file(int fd, void *buf, int len, unsigned long long
offset);
+extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long
long offset);
extern int os_file_modtime(const char *file, unsigned long *modtime);
extern int os_pipe(int *fd, int stream, int close_on_exec);
extern int os_set_fd_async(int fd);
@@ -284,15 +287,18 @@ extern void halt_skas(void);
extern void reboot_skas(void);
/* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int
size_tmpfds);
+
+extern int os_setup_epoll(int maxevents);
+extern int os_waiting_for_events_epoll(void *kernel_events, int maxevents);
+extern int os_add_epoll_fd (int events, int fd, void * data);
+extern int os_mod_epoll_fd (int events, int fd, void * data);
+extern int os_del_epoll_fd (int fd);
+
extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
extern void os_free_irq_later(struct irq_fd *active_fds,
int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
-extern void os_set_ioignore(void);
+extern void os_close_epoll(void);
/* sigio.c */
extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb935..52effff 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,7 @@
/*
+ * Copyright (C) 2015 Brocade Communications Ltd
+ * Author: Anton Ivanov aivanov@{brocade.com,kot-begemot.co.uk}
+ * Copyright (C) 2012 - 2014 Cisco Systems
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
* Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -18,6 +21,61 @@
#include <os.h>
/*
+* We are on the "kernel side" so we cannot pick up the sys/epoll.h
+* So we lift out of it the applicable key definitions.
+*/
+
+
+enum EPOLL_EVENTS
+ {
+ EPOLLIN = 0x001,
+#define EPOLLIN EPOLLIN
+ EPOLLPRI = 0x002,
+#define EPOLLPRI EPOLLPRI
+ EPOLLOUT = 0x004,
+#define EPOLLOUT EPOLLOUT
+ EPOLLRDNORM = 0x040,
+#define EPOLLRDNORM EPOLLRDNORM
+ EPOLLRDBAND = 0x080,
+#define EPOLLRDBAND EPOLLRDBAND
+ EPOLLWRNORM = 0x100,
+#define EPOLLWRNORM EPOLLWRNORM
+ EPOLLWRBAND = 0x200,
+#define EPOLLWRBAND EPOLLWRBAND
+ EPOLLMSG = 0x400,
+#define EPOLLMSG EPOLLMSG
+ EPOLLERR = 0x008,
+#define EPOLLERR EPOLLERR
+ EPOLLHUP = 0x010,
+#define EPOLLHUP EPOLLHUP
+ EPOLLRDHUP = 0x2000,
+#define EPOLLRDHUP EPOLLRDHUP
+ EPOLLONESHOT = (1 << 30),
+#define EPOLLONESHOT EPOLLONESHOT
+ EPOLLET = (1 << 31)
+#define EPOLLET EPOLLET
+ };
+
+
+typedef union epoll_data
+{
+ void *ptr;
+ int fd;
+ uint32_t u32;
+ uint64_t u64;
+} epoll_data_t;
+
+struct epoll_event
+{
+ uint32_t events; /* Epoll events */
+ epoll_data_t data; /* User data variable */
+} __attribute__ ((__packed__));
+
+#define MAX_EPOLL_EVENTS 16
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/*
* This list is accessed under irq_lock, except in sigio_handler,
* where it is safe from being modified. IRQ handlers won't change it -
* if an IRQ source has vanished, it will be freed by free_irqs just
@@ -25,44 +83,111 @@
* list of irqs to free, with its own locking, coming back here to
* remove list elements, taking the irq_lock to do so.
*/
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
+static struct irq_entry *active_fds = NULL;
extern void free_irqs(void);
+
+static DEFINE_SPINLOCK(irq_lock);
+
+
+/*
+ * Principles of Operation:
+ * Each Epoll structure contains a pointer pointing back to an array
+ * with irq entries for read, write and none and their matching event
+ * masks.
+ * This allows us to stop looking up "who talked"
+ * We no longer need to enable/disable any polls while we process them
+ * epoll will take care of that. The exemption to this (for now) are
+ * character devices because of their own internal buffering, which
+ * needs to be updated to leverage the new write IRQ semantics.
+ * We can now support both read and write IRQs and have separate IRQs
+ * for read and write ops.
+ */
+
+/* For now this variable is for debug purposes, we will later re-use
+ * it for the multi-send/multi-write network FSM
+ */
+
+static int in_epoll_loop = 0;
+
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs
*regs)
{
struct irq_fd *irq_fd;
- int n;
+ struct irq_entry *irq_entry;
+ unsigned long flags;
+
+ int n, i, j;
+
+
while (1) {
- n = os_waiting_for_events(active_fds);
- if (n <= 0) {
- if (n == -EINTR)
- continue;
- else break;
+
+ if (!spin_trylock_irqsave(&irq_lock, flags)) {
+ break;
}
+ WARN_ON(in_epoll_loop);
+ in_epoll_loop = 1;
- for (irq_fd = active_fds; irq_fd != NULL;
- irq_fd = irq_fd->next) {
- if (irq_fd->current_events != 0) {
- irq_fd->current_events = 0;
- do_IRQ(irq_fd->irq, regs);
- }
+ n = os_waiting_for_events_epoll(
+ &epoll_events, MAX_EPOLL_EVENTS
+ );
+
+
+ if (n <= 0) {
+ in_epoll_loop = 0;
+ spin_unlock_irqrestore(&irq_lock, flags);
+ if (n == -EINTR) { continue; }
+ else { break; }
}
+
+
+ for (i = 0; i < n ; i++) {
+ /* start from the data ptr, walk the tree branch */
+ irq_entry = (struct irq_entry *)
epoll_events[i].data.ptr;
+ for (j = 0; j < MAX_IRQ_TYPE ; j ++ ) {
+ irq_fd = irq_entry->irq_array[j];
+ if (irq_fd != NULL) {
+ if (epoll_events[i].events &
irq_fd->events) {
+ do_IRQ(irq_fd->irq, regs);
+ }
+ }
+ }
+ }
+ in_epoll_loop = 0;
+ spin_unlock_irqrestore(&irq_lock, flags);
}
free_irqs();
}
-static DEFINE_SPINLOCK(irq_lock);
+static int update_events(struct irq_entry * irq_entry)
+{
+ int i;
+ int events = 0;
+ struct irq_fd * irq_fd;
+ for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+ irq_fd = irq_entry->irq_array[i];
+ if (irq_fd != NULL) {
+ events = irq_fd->events | events;
+ }
+ }
+ if (events > 0) {
+ /* os_add_epoll will call os_mod_epoll if this already exists */
+ return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+ } else {
+ /* No events - delete, because EPOLL_ERR will always trigger */
+ return os_del_epoll_fd(irq_entry->fd);
+ }
+}
+
static int activate_fd(int irq, int fd, int type, void *dev_id)
{
- struct pollfd *tmp_pfd;
- struct irq_fd *new_fd, *irq_fd;
+ struct irq_fd *new_fd;
+ struct irq_entry * irq_entry;
+ int i, err, events;
unsigned long flags;
- int events, err, n;
err = os_set_fd_async(fd);
if (err < 0)
@@ -74,186 +199,155 @@ static int activate_fd(int irq, int fd, int type, void
*dev_id)
goto out;
if (type == IRQ_READ)
- events = UM_POLLIN | UM_POLLPRI;
- else events = UM_POLLOUT;
- *new_fd = ((struct irq_fd) { .next = NULL,
- .id = dev_id,
- .fd = fd,
- .type = type,
- .irq = irq,
- .events = events,
- .current_events = 0 } );
-
- err = -EBUSY;
- spin_lock_irqsave(&irq_lock, flags);
- for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
- if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
- printk(KERN_ERR "Registering fd %d twice\n", fd);
- printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
- printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
- dev_id);
- goto out_unlock;
- }
- }
-
+ events = EPOLLIN | EPOLLPRI;
if (type == IRQ_WRITE)
- fd = -1;
+ events = EPOLLOUT;
- tmp_pfd = NULL;
- n = 0;
+ *new_fd = ((struct irq_fd) {
+ .id = dev_id,
+ .irq = irq,
+ .events = events
+ });
- while (1) {
- n = os_create_pollfd(fd, events, tmp_pfd, n);
- if (n == 0)
- break;
+ err = -EBUSY;
- /*
- * n > 0
- * It means we couldn't put new pollfd to current pollfds
- * and tmp_fds is NULL or too small for new pollfds array.
- * Needed size is equal to n as minimum.
- *
- * Here we have to drop the lock in order to call
- * kmalloc, which might sleep.
- * If something else came in and changed the pollfds array
- * so we will not be able to put new pollfd struct to pollfds
- * then we free the buffer tmp_fds and try again.
- */
- spin_unlock_irqrestore(&irq_lock, flags);
- kfree(tmp_pfd);
+ spin_lock_irqsave(&irq_lock, flags);
+ WARN_ON(in_epoll_loop);
- tmp_pfd = kmalloc(n, GFP_KERNEL);
- if (tmp_pfd == NULL)
- goto out_kfree;
+ for (irq_entry = active_fds; irq_entry != NULL; irq_entry =
irq_entry->next) {
+ if (irq_entry->fd == fd) break;
+ }
- spin_lock_irqsave(&irq_lock, flags);
+ if (irq_entry == NULL) {
+ irq_entry = kmalloc(sizeof(struct irq_entry), GFP_KERNEL);
+ if (irq_entry == NULL) {
+ printk(KERN_ERR
+ "Failed to allocate new IRQ entry\n");
+ kfree(new_fd);
+ goto out_unlock;
+ }
+ irq_entry->fd = fd;
+ for (i = 0; i < MAX_IRQ_TYPE; i++) {
+ irq_entry->irq_array[i] = NULL;
+ }
+ irq_entry->next = active_fds;
+ active_fds = irq_entry;
}
- *last_irq_ptr = new_fd;
- last_irq_ptr = &new_fd->next;
+ if (irq_entry->irq_array[type] != NULL) {
+ printk(KERN_ERR
+ "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+ irq, fd, type, dev_id
+ );
+ goto out_unlock;
+ } else {
+ irq_entry->irq_array[type] = new_fd;
+ }
+ update_events(irq_entry);
+
spin_unlock_irqrestore(&irq_lock, flags);
- /*
- * This calls activate_fd, so it has to be outside the critical
- * section.
- */
- maybe_sigio_broken(fd, (type == IRQ_READ));
+ maybe_sigio_broken(fd, (type != IRQ_NONE));
return 0;
out_unlock:
spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
kfree(new_fd);
out:
return err;
}
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
-{
- unsigned long flags;
- spin_lock_irqsave(&irq_lock, flags);
- os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
- spin_unlock_irqrestore(&irq_lock, flags);
-}
-
-struct irq_and_dev {
- int irq;
- void *dev;
-};
-
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static void do_free_by_irq_and_dev(
+ struct irq_entry* irq_entry,
+ unsigned int irq,
+ void * dev
+)
{
- struct irq_and_dev *data = d;
-
- return ((irq->irq == data->irq) && (irq->id == data->dev));
-}
-
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
- struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq,
- .dev = dev });
-
- free_irq_by_cb(same_irq_and_dev, &data);
-}
-
-static int same_fd(struct irq_fd *irq, void *fd)
-{
- return (irq->fd == *((int *)fd));
+ int i;
+ struct irq_fd * to_free;
+ for (i = 0; i < MAX_IRQ_TYPE ; i ++ ) {
+ if (irq_entry->irq_array[i] != NULL) {
+ if (
+ (irq_entry->irq_array[i]->irq == irq) &&
+ (irq_entry->irq_array[i]->id == dev)
+ ) {
+ to_free = irq_entry->irq_array[i];
+ irq_entry->irq_array[i] = NULL;
+ update_events(irq_entry);
+ kfree(to_free);
+ }
+ }
+ }
}
void free_irq_by_fd(int fd)
{
- free_irq_by_cb(same_fd, &fd);
-}
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
-{
- struct irq_fd *irq;
- int i = 0;
- int fdi;
+ struct irq_entry *irq_entry, *prev = NULL;
+ unsigned long flags;
+ int i;
- for (irq = active_fds; irq != NULL; irq = irq->next) {
- if ((irq->fd == fd) && (irq->irq == irqnum))
- break;
- i++;
- }
- if (irq == NULL) {
- printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
- fd);
- goto out;
- }
- fdi = os_get_pollfd(i);
- if ((fdi != -1) && (fdi != fd)) {
- printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
- "and pollfds, fd %d vs %d, need %d\n", irq->fd,
- fdi, fd);
- irq = NULL;
- goto out;
+ spin_lock_irqsave(&irq_lock, flags);
+ WARN_ON(in_epoll_loop);
+ for (irq_entry = active_fds; irq_entry != NULL; irq_entry =
irq_entry->next) {
+ if (irq_entry->fd == irq_entry->fd) {
+ os_del_epoll_fd(fd); /* ignore err, just do it */
+ for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+ if (irq_entry->irq_array[i] != NULL) {
+ kfree(irq_entry->irq_array[i]);
+ }
+ }
+ if (prev == NULL) {
+ active_fds = irq_entry->next;
+ } else {
+ prev->next = irq_entry->next;
+ }
+ kfree(irq_entry);
+ } else {
+ prev = irq_entry;
+ }
}
- *index_out = i;
- out:
- return irq;
+ spin_unlock_irqrestore(&irq_lock, flags);
+
}
-void reactivate_fd(int fd, int irqnum)
+
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
{
- struct irq_fd *irq;
+
+ struct irq_entry *irq_entry;
unsigned long flags;
- int i;
spin_lock_irqsave(&irq_lock, flags);
- irq = find_irq_by_fd(fd, irqnum, &i);
- if (irq == NULL) {
- spin_unlock_irqrestore(&irq_lock, flags);
- return;
+ for (irq_entry = active_fds; irq_entry != NULL; irq_entry =
irq_entry->next) {
+ do_free_by_irq_and_dev(irq_entry, irq, dev);
}
- os_set_pollfd(i, irq->fd);
spin_unlock_irqrestore(&irq_lock, flags);
-
- add_sigio_fd(fd);
+
}
-void deactivate_fd(int fd, int irqnum)
-{
- struct irq_fd *irq;
- unsigned long flags;
- int i;
- spin_lock_irqsave(&irq_lock, flags);
- irq = find_irq_by_fd(fd, irqnum, &i);
- if (irq == NULL) {
- spin_unlock_irqrestore(&irq_lock, flags);
- return;
+void reactivate_fd(int fd, int irqnum)
+{
+ /* this is always called from interrupt context we
+ * should not need to lock here
+ */
+ struct irq_entry *irq_entry;
+ WARN_ON(!in_epoll_loop);
+ for (irq_entry = active_fds; irq_entry != NULL; irq_entry =
irq_entry->next) {
+ if (irq_entry->fd == fd) {
+ update_events(irq_entry);
+ }
}
+
+}
- os_set_pollfd(i, -1);
- spin_unlock_irqrestore(&irq_lock, flags);
-
- ignore_sigio_fd(fd);
+void deactivate_fd(int fd, int irqnum)
+{
+ os_del_epoll_fd(fd); /* ignore err, just do it */
}
EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +359,17 @@ EXPORT_SYMBOL(deactivate_fd);
*/
int deactivate_all_fds(void)
{
- struct irq_fd *irq;
+ struct irq_entry * irq_entry;
int err;
- for (irq = active_fds; irq != NULL; irq = irq->next) {
- err = os_clear_fd_async(irq->fd);
- if (err)
- return err;
+ for (irq_entry = active_fds; irq_entry != NULL; irq_entry =
irq_entry->next) {
+ os_del_epoll_fd(irq_entry->fd); /* ignore err, just do it */
+ err = os_clear_fd_async(irq_entry->fd);
+ if (err) {
+ printk(KERN_ERR "Clear FD async failed with %d", err);
+ }
}
- /* If there is a signal already queued, after unblocking ignore it */
- os_set_ioignore();
-
+ os_close_epoll();
return 0;
}
@@ -308,13 +402,13 @@ int um_request_irq(unsigned int irq, int fd, int type,
{
int err;
- if (fd != -1) {
+ err = request_irq(irq, handler, irqflags, devname, dev_id);
+
+ if ((!err) && (fd != -1)) {
err = activate_fd(irq, fd, type, dev_id);
- if (err)
- return err;
}
- return request_irq(irq, handler, irqflags, devname, dev_id);
+ return err;
}
EXPORT_SYMBOL(um_request_irq);
@@ -352,9 +446,9 @@ void __init init_IRQ(void)
int i;
irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type,
handle_edge_irq);
-
- for (i = 1; i < NR_IRQS; i++)
+ for (i = 1; i < NR_IRQS - 1 ; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+ os_setup_epoll(MAX_EPOLL_EVENTS);
}
/*
@@ -382,11 +476,11 @@ void __init init_IRQ(void)
* thread_info.
*
* There are three cases -
- * The first interrupt on the stack - sets up the thread_info and
+ * The first interrupt on the stack - sets up the thread_info and
* handles the interrupt
- * A nested interrupt interrupting the copying of the thread_info -
+ * A nested interrupt interrupting the copying of the thread_info -
* can't handle the interrupt, as the stack is in an unknown state
- * A nested interrupt not interrupting the copying of the
+ * A nested interrupt not interrupting the copying of the
* thread_info - doesn't do any setup, just handles the interrupt
*
* The first job is to figure out whether we interrupted stack setup.
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index b5e0cbb..4973f5c 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -16,7 +16,7 @@ static irqreturn_t sigio_interrupt(int irq, void *data)
char c;
os_read_file(sigio_irq_fd, &c, sizeof(c));
- reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
+ // reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
return IRQ_HANDLED;
}
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 26e0164..2db18cb 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -264,6 +264,15 @@ int os_read_file(int fd, void *buf, int len)
return n;
}
+int os_pread_file(int fd, void *buf, int len, unsigned long long offset)
+{
+ int n = pread(fd, buf, len, offset);
+
+ if (n < 0)
+ return -errno;
+ return n;
+}
+
int os_write_file(int fd, const void *buf, int len)
{
int n = write(fd, (void *) buf, len);
@@ -282,6 +291,16 @@ int os_sync_file(int fd)
return n;
}
+int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset)
+{
+ int n = pwrite(fd, (void *) buf, len, offset);
+
+ if (n < 0)
+ return -errno;
+ return n;
+}
+
+
int os_file_size(const char *file, unsigned long long *size_out)
{
struct uml_stat buf;
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74..81b135a 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (C) 2012 - 2014 Cisco Systems
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
@@ -6,6 +7,7 @@
#include <stdlib.h>
#include <errno.h>
#include <poll.h>
+#include <sys/epoll.h>
#include <signal.h>
#include <string.h>
#include <irq_user.h>
@@ -16,120 +18,88 @@
* Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd
* and os_free_irq_by_cb, which are called under irq_lock.
*/
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
-int os_waiting_for_events(struct irq_fd *active_fds)
+/* epoll support */
+
+
+static int epollfd = -1;
+
+int os_setup_epoll(int maxevents) {
+ epollfd = epoll_create(maxevents);
+ return epollfd;
+}
+
+int os_waiting_for_events_epoll(void *kernel_events, int maxevents)
{
- struct irq_fd *irq_fd;
- int i, n, err;
+ int n, err;
- n = poll(pollfds, pollfds_num, 0);
+ n = epoll_wait(epollfd,
+ (struct epoll_event *) kernel_events, maxevents, 0);
if (n < 0) {
err = -errno;
if (errno != EINTR)
- printk(UM_KERN_ERR "os_waiting_for_events:"
- " poll returned %d, errno = %d\n", n, errno);
+ printk(
+ UM_KERN_ERR "os_waiting_for_events:"
+ " poll returned %d, error = %s\n", n,
+ strerror(errno)
+ );
return err;
}
- if (n == 0)
- return 0;
+ return n;
+}
- irq_fd = active_fds;
+int os_add_epoll_fd (int events, int fd, void * data) {
+ struct epoll_event event;
+ int result;
- for (i = 0; i < pollfds_num; i++) {
- if (pollfds[i].revents != 0) {
- irq_fd->current_events = pollfds[i].revents;
- pollfds[i].fd = -1;
- }
- irq_fd = irq_fd->next;
+ event.data.ptr = data;
+ event.events = events | EPOLLET;
+ result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+ if ((result) && (errno == EEXIST)) {
+ result = os_mod_epoll_fd (events, fd, data);
}
- return n;
+ if (result) {
+ printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+ }
+ return result;
}
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
- if (pollfds_num == pollfds_size) {
- if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
- /* return min size needed for new pollfds area */
- return (pollfds_size + 1) * sizeof(pollfds[0]);
- }
-
- if (pollfds != NULL) {
- memcpy(tmp_pfd, pollfds,
- sizeof(pollfds[0]) * pollfds_size);
- /* remove old pollfds */
- kfree(pollfds);
- }
- pollfds = tmp_pfd;
- pollfds_size++;
- } else
- kfree(tmp_pfd); /* remove not used tmp_pfd */
-
- pollfds[pollfds_num] = ((struct pollfd) { .fd = fd,
- .events = events,
- .revents = 0 });
- pollfds_num++;
-
- return 0;
+int os_mod_epoll_fd (int events, int fd, void * data) {
+ struct epoll_event event;
+ int result;
+ event.data.ptr = data;
+ event.events = events;
+ result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+ if (result) {
+ printk("epollctl mod err fd %d, %s\n", fd, strerror(errno));
+ }
+ return result;
}
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
- struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
-{
- struct irq_fd **prev;
- int i = 0;
-
- prev = &active_fds;
- while (*prev != NULL) {
- if ((*test)(*prev, arg)) {
- struct irq_fd *old_fd = *prev;
- if ((pollfds[i].fd != -1) &&
- (pollfds[i].fd != (*prev)->fd)) {
- printk(UM_KERN_ERR "os_free_irq_by_cb - "
- "mismatch between active_fds and "
- "pollfds, fd %d vs %d\n",
- (*prev)->fd, pollfds[i].fd);
- goto out;
- }
-
- pollfds_num--;
-
- /*
- * This moves the *whole* array after pollfds[i]
- * (though it doesn't spot as such)!
- */
- memmove(&pollfds[i], &pollfds[i + 1],
- (pollfds_num - i) * sizeof(pollfds[0]));
- if (*last_irq_ptr2 == &old_fd->next)
- *last_irq_ptr2 = prev;
-
- *prev = (*prev)->next;
- if (old_fd->type == IRQ_WRITE)
- ignore_sigio_fd(old_fd->fd);
- kfree(old_fd);
- continue;
- }
- prev = &(*prev)->next;
- i++;
+int os_del_epoll_fd (int fd) {
+ struct epoll_event event;
+ int result;
+ result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+ if (result) {
+ printk("epollctl del err %s\n", strerror(errno));
}
- out:
- return;
+ return result;
}
-int os_get_pollfd(int i)
+void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
+ struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
{
- return pollfds[i].fd;
+ printk("Someone invoking obsolete deactivate_by_CB!!!\n");
+ return;
}
-void os_set_pollfd(int i, int fd)
+void os_set_ioignore(void)
{
- pollfds[i].fd = fd;
+ signal(SIGIO, SIG_IGN);
}
-void os_set_ioignore(void)
+void os_close_epoll(void)
{
- signal(SIGIO, SIG_IGN);
+ os_close_file(epollfd);
}
--
2.1.4
------------------------------------------------------------------------------
_______________________________________________
User-mode-linux-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel