Hi All,
I've hacked together a patch to Event-1.06 to add epoll support on
Linux. Its enabled by adding -DHAS_EPOLL. I've turned on that switch
by default in the attached patch. I've tested it on Fedora Core 3 with
kernel 2.6.11-1.14_FC3.
I thought about using libevent, but it turned out to be too hard to
intermingle the event loops.
I'm interested in any feedback that anyone has. Especially any follow up
assistance with properly detecting if epoll is available on the system
and enabling it by default. Do you think this code is suitable with
putting into the main module?
Here is a benchmark run with the epoll code enabled on a 1Ghz Intel
Pentium 3:
benchmark: 0.08
Event: 1.06
perl 5.008007
uname=Linux synthesis.labs.infogears.com 2.6.11-1.14_FC3 #1 Thu Apr 7 19:23:49
EDT 2005 i686 i686 i386 GNU/Linux
cc='cc', optimize='-O2'
ccflags='-fno-strict-aliasing -pipe -I/usr/local/include -D_LARGEFILE_SOURCE
-D_FILE_OFFSET_BITS=64 -I/usr/include/gdbm'
Elapse Time: 99.94% of 11 seconds
Timer/sec: 98.43% (803 total)
Io/sec: 55665.822 (611936 total)
Signals/sec 1.91
Events/sec 59219.892
Null/sec 547318
Event/Null 10.82%
Thanks,
Rusty
--
Rusty Conover
InfoGears Inc.
http://www.infogears.com
406-587-5432
diff -ur Event-1.06/c/unix.c /home/rconover/Event-1.06/c/unix.c
--- Event-1.06/c/unix.c 2005-09-11 14:10:38.000000000 -0600
+++ /home/rconover/Event-1.06/c/unix.c 2005-09-11 19:38:48.000000000 -0600
@@ -1,3 +1,208 @@
+#if defined(HAS_EPOLL)
+
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/queue.h>
+#include <sys/epoll.h>
+
+struct epollop {
+ /* watcher pointers */
+ pe_io **fds;
+ int nfds;
+ struct epoll_event *events;
+ int nevents;
+ int epfd;
+};
+
+struct epollop epollop;
+
+
+#endif /* HAS_EPOLL */
+
+/* Queue some IO back into the callback system for some bits */
+static void _queue_io(pe_io *wa, int got) {
+ pe_ioevent *ev;
+ got &= wa->poll;
+ if (!got) {
+ if (WaDEBUGx(wa) >= 3) {
+ STRLEN n_a;
+ warn("Event: io '%s' queued nothing", SvPV(wa->base.desc, n_a));
+ }
+ return;
+ }
+ ev = (pe_ioevent*) (*wa->base.vtbl->new_event)((pe_watcher*) wa);
+ ++ev->base.hits;
+ ev->got |= got;
+ queueEvent((pe_event*) ev);
+}
+
+static void boot_epoll() {
+#if defined(HAS_EPOLL)
+ struct rlimit rl;
+ int epfd, nfiles;
+
+ /* Get the total number of descriptors able to be opened in this process
+ and allocate enough slots to handle all of them */
+ if (getrlimit(RLIMIT_NOFILE, &rl) == 0) {
+ if(rl.rlim_cur != RLIM_INFINITY) {
+ nfiles = rl.rlim_cur;
+ } else {
+ /* An arbitrary limit of descriptors */
+ nfiles = 64*1024*1024;
+ }
+ } else {
+ croak("Failed to get opened file handle limit");
+ }
+
+ /* Initalize the kernel queue, with an initial size of
+ descriptors, don't size to the maximum here, since the kernel
+ will automatically resize. */
+
+ if ((epfd = epoll_create(100)) == -1) {
+ croak("Failed to create epoll with %d files", nfiles);
+ }
+
+ /* set the close on exec flag */
+ fcntl(epfd, F_SETFD, 1);
+
+ epollop.epfd = epfd;
+
+ /* Init and alloc fields */
+ EvNew(9, epollop.events, nfiles, struct epoll_event);
+ epollop.nevents = nfiles;
+
+ EvNew(9, epollop.fds, nfiles, pe_io *);
+ epollop.nfds = nfiles;
+
+#endif /*HAS_EPOLL*/
+}
+
+
+
+
+
+#if defined(HAS_EPOLL)
+
+static void pe_sys_sleep(double left) {
+ int ret;
+ double t0 = NVtime();
+ double t1 = t0 + left;
+ while (1) {
+ /* No need to call epoll here when poll will work just fine */
+ ret = poll(0, 0, (int) (left * 1000)); /* hope zeroes okay */
+ if (ret < 0 && errno != EAGAIN && errno != EINTR)
+ croak("poll(%.2f) got errno %d", left, errno);
+ left = t1 - NVtime();
+ if (left > IntervalEpsilon) {
+ if (ret==0) ++TimeoutTooEarly;
+ continue;
+ }
+ break;
+ }
+}
+
+static void pe_sys_io_add (pe_io *ev) {
+ struct stat buf;
+ STRLEN n_a;
+ int op = EPOLL_CTL_ADD;
+
+
+ if (ev->fd <= 0) {
+ croak("pe_sys_io_add: non-valid fd (%d)", ev->fd);
+ return;
+ }
+
+ if(ev->poll & PE_E) {
+ epollop.events[ev->fd].events |= EPOLLPRI;
+ }
+
+ if(ev->poll & PE_R) {
+ epollop.events[ev->fd].events |= EPOLLIN | EPOLLPRI | EPOLLERR
| EPOLLHUP;
+ }
+ if(ev->poll & PE_W) {
+ epollop.events[ev->fd].events |= EPOLLOUT | EPOLLERR;
+ }
+
+ /* If this descriptor is already used we need to modify the flags */
+ if(epollop.fds[ev->fd] != NULL) {
+ op = EPOLL_CTL_MOD;
+ }
+
+
+ epollop.events[ev->fd].data.ptr = ev;
+
+ if (epoll_ctl(epollop.epfd, op, ev->fd, &epollop.events[ev->fd]) == -1)
{
+ /* Check if its a bad descriptor */
+ if (ev->fd >= 0 && PerlLIO_fstat(ev->fd, &buf) < 0 && errno ==
EBADF) {
+ warn("Event: '%s' was unexpectedly closed",
+ SvPV(ev->base.desc, n_a));
+ pe_io_reset_handle((pe_watcher*) ev);
+ return;
+ }
+ croak("Failed to change the epoll settings unknown error",
ev->fd);
+ }
+
+ /* Save the pointer to the current event */
+ epollop.fds[ev->fd] = ev;
+ return;
+}
+
+static void pe_sys_io_del (pe_io *ev) {
+ if(ev->fd < 0) {
+ return;
+ }
+
+ if (epoll_ctl(epollop.epfd, EPOLL_CTL_DEL, ev->fd,
&epollop.events[ev->fd]) == -1)
+ croak("Failed to delete from epoll");
+
+ epollop.fds[ev->fd] = 0;
+
+ return;
+}
+
+
+static void pe_sys_multiplex(double timeout) {
+ int i, res, real_timeout;
+
+ if(timeout <= 0) {
+ real_timeout = 0;
+ } else {
+ real_timeout = timeout * 1000;
+ }
+
+ res = epoll_wait(epollop.epfd, epollop.events, epollop.nevents,
real_timeout);
+
+ if (res == -1) {
+ return;
+ }
+
+ for (i = 0; i < res; i++) {
+ int what = epollop.events[i].events;
+ pe_io *this_watcher = (struct pe_io
*)epollop.events[i].data.ptr;
+
+ /* In the event of an error signal either the read or the write
callback */
+ if (what & EPOLLHUP || what & EPOLLERR) {
+ what |= EPOLLIN | EPOLLOUT;
+ }
+
+ if (what & EPOLLIN) {
+ _queue_io(this_watcher, PE_R);
+ }
+
+ if (what & EPOLLOUT) {
+ _queue_io(this_watcher, PE_W);
+ }
+ }
+ return;
+}
+
+#endif /* HAS_EPOLL */
+
+
+/* ***************************** EPOLL */
+
+
+
#if defined(HAS_DEVPOLL)
#include <sys/devpoll.h>
static int dpfd=0;
@@ -53,24 +258,9 @@
return -1;
}
-static void _queue_io(pe_io *wa, int got) {
- pe_ioevent *ev;
- got &= wa->poll;
- if (!got) {
- if (WaDEBUGx(wa) >= 3) {
- STRLEN n_a;
- warn("Event: io '%s' queued nothing", SvPV(wa->base.desc, n_a));
- }
- return;
- }
- ev = (pe_ioevent*) (*wa->base.vtbl->new_event)((pe_watcher*) wa);
- ++ev->base.hits;
- ev->got |= got;
- queueEvent((pe_event*) ev);
-}
/************************************************* DEVPOLL */
-#if defined(HAS_DEVPOLL) && !PE_SYS_IO
+#if defined(HAS_DEVPOLL) && !PE_SYS_IO && !defined(HAS_EPOLL)
#define PE_SYS_IO 1
static void pe_sys_sleep(double left) {
@@ -229,7 +419,7 @@
#endif /*HAS_DEVPOLL*/
/************************************************* POLL */
-#if defined(HAS_POLL) && !PE_SYS_IO
+#if defined(HAS_POLL) && !PE_SYS_IO && !defined(HAS_EPOLL)
#define PE_SYS_IO 1
static struct pollfd *Pollfd=0;
@@ -346,7 +536,7 @@
/************************************************* SELECT */
-#if defined(HAS_SELECT) && !PE_SYS_IO
+#if defined(HAS_SELECT) && !PE_SYS_IO && !defined(HAS_EPOLL)
#define PE_SYS_IO 1
static int Nfds;
diff -ur Event-1.06/ChangeLog /home/rconover/Event-1.06/ChangeLog
--- Event-1.06/ChangeLog 2005-09-11 14:10:38.000000000 -0600
+++ /home/rconover/Event-1.06/ChangeLog 2005-09-11 19:44:13.000000000 -0600
@@ -1,3 +1,14 @@
+2005-09-11 <[EMAIL PROTECTED]>
+
+ * Add epoll support on linux, enabled with -DHAS_EPOLL
+
+ * Change the t/io.t test to add the warning handler before the fd
+ is added to the Event object, because with epoll the EBADF error
+ will be reported when the fd is added to the epoll fd.
+
+ * Change Event.xs to call boot_epoll() to initialize the epoll fd,
+ so it can be used later on.
+
2005-05-14 <[EMAIL PROTECTED]>
* Release 1.06.
Only in /home/rconover/Event-1.06: ChangeLog~
diff -ur Event-1.06/Event.xs /home/rconover/Event-1.06/Event.xs
--- Event-1.06/Event.xs 2005-09-11 14:10:38.000000000 -0600
+++ /home/rconover/Event-1.06/Event.xs 2005-09-11 19:27:27.000000000 -0600
@@ -296,6 +296,7 @@
boot_idle();
boot_timer();
boot_io();
+ boot_epoll();
boot_devpoll();
boot_var();
boot_tied();
diff -ur Event-1.06/Makefile.PL /home/rconover/Event-1.06/Makefile.PL
--- Event-1.06/Makefile.PL 2005-09-11 14:10:38.000000000 -0600
+++ /home/rconover/Event-1.06/Makefile.PL 2005-09-11 19:27:01.000000000
-0600
@@ -11,6 +11,7 @@
my @opt=(PREREQ_PM => { Test => 1 },
VERSION_FROM => "./lib/Event.pm",
NAME => "Event",
+ DEFINE => '-DHAS_EPOLL',
TYPEMAPS => ['./lib/Event/typemap'],
INC => '-Ic -Ilib/Event',
H => [glob("c/*"), glob("*.h"), "lib/Event/EventAPI.h"],
Only in /home/rconover/Event-1.06/t: foo.run
diff -ur Event-1.06/t/io.t /home/rconover/Event-1.06/t/io.t
--- Event-1.06/t/io.t 2005-09-11 19:47:44.000000000 -0600
+++ /home/rconover/Event-1.06/t/io.t 2005-09-11 15:49:43.000000000 -0600
@@ -23,12 +23,6 @@
my $noticed_bogus_fd=0;
my $bogus_timeout=0;
-my $bogus = Event->io(desc => 'oops', poll => 'r', fd => 123,
- timeout => .1, cb => sub {
- ++$bogus_timeout;
- });
-
-print "bogus: $bogus\n";
$SIG{__WARN__} = sub {
my $is_it = $_[0] =~ m/\'oops\' was unexpectedly/;
@@ -39,6 +33,12 @@
}
};
+
+my $bogus = Event->io(desc => 'oops', poll => 'r', fd => 123,
+ timeout => .1, cb => sub {
+ ++$bogus_timeout;
+ });
+
sub new_pipe {
my ($cnt) = @_;
my ($r,$w) = (gensym, gensym);