# HG changeset patch # User Ping Zhao <ping.z...@intel.com> # Date 1610554205 18000 # Wed Jan 13 11:10:05 2021 -0500 # Node ID 95886c3353dc80a3da215027c1e0f2141e47e911 # Parent b055bb6ef87e49232a7fcb4e5334b8efda3b6499 Add io_uring support in AIO(async io) module.
Hello, This is a patch to support io_uring in AIO(async io) module. Basically you don't need change your configurations. If you're using new kernel(above v5.1) which supports io_uring, and you have "aio on" in your configuration. Nginx will use io_uring for FILE_AIO access which can achieve performance improvement than legacy libaio. Checked with iostat which shows nvme disk io has 30%+ performance improvement with 1 thread. Use wrk with 100 threads 200 connections(-t 100 -c 200) with 25000 random requests. iostat(B/s) libaio ~1.0 GB/s io_uring 1.3+ GB/s diff -r b055bb6ef87e -r 95886c3353dc auto/unix --- a/auto/unix Mon Jan 11 22:06:27 2021 +0300 +++ b/auto/unix Wed Jan 13 11:10:05 2021 -0500 @@ -531,6 +531,30 @@ fi if [ $ngx_found = no ]; then + ngx_feature="Linux AIO support(IO_URING)" + ngx_feature_name="NGX_HAVE_FILE_AIO" + ngx_feature_incs="#include <liburing.h>" + ngx_feature_path= + ngx_feature_libs="-luring" + ngx_feature_test="struct io_uring ring; + struct io_uring_params params; + int ret; + memset(¶ms, 0, sizeof(params)); + ret = io_uring_queue_init_params(64, &ring, ¶ms); + if (ret < 0) return 1; + if (!(params.features & IORING_FEAT_FAST_POLL)) return 1; + io_uring_queue_exit(&ring)" + . auto/feature + + if [ $ngx_found = yes ]; then + have=NGX_HAVE_EVENTFD . auto/have + have=NGX_HAVE_FILE_IOURING . auto/have + CORE_LIBS="$CORE_LIBS -luring" + CORE_SRCS="$CORE_SRCS $LINUX_AIO_SRCS" + fi + fi + + if [ $ngx_found = no ]; then ngx_feature="Linux AIO support" ngx_feature_name="NGX_HAVE_FILE_AIO" diff -r b055bb6ef87e -r 95886c3353dc src/core/ngx_output_chain.c --- a/src/core/ngx_output_chain.c Mon Jan 11 22:06:27 2021 +0300 +++ b/src/core/ngx_output_chain.c Wed Jan 13 11:10:05 2021 -0500 @@ -589,6 +589,20 @@ if (ctx->aio_handler) { n = ngx_file_aio_read(src->file, dst->pos, (size_t) size, src->file_pos, ctx->pool); +#if (NGX_HAVE_FILE_IOURING) + if (n > 0 && n < size) { + ngx_log_error(NGX_LOG_INFO, ctx->pool->log, 0, + ngx_read_file_n " Try again, only read %z of %O from \"%s\"", + n, size, src->file->name.data); + + src->file_pos += n; + dst->last += n; + + n = ngx_file_aio_read(src->file, dst->pos+n, (size_t) size-n, + src->file_pos, ctx->pool); + + } +#endif if (n == NGX_AGAIN) { ctx->aio_handler(ctx, src->file); return NGX_AGAIN; diff -r b055bb6ef87e -r 95886c3353dc src/event/modules/ngx_epoll_module.c --- a/src/event/modules/ngx_epoll_module.c Mon Jan 11 22:06:27 2021 +0300 +++ b/src/event/modules/ngx_epoll_module.c Wed Jan 13 11:10:05 2021 -0500 @@ -9,6 +9,9 @@ #include <ngx_core.h> #include <ngx_event.h> +#if (NGX_HAVE_FILE_IOURING) +#include <liburing.h> +#endif #if (NGX_TEST_BUILD_EPOLL) @@ -77,6 +80,9 @@ #if (NGX_HAVE_FILE_AIO) +#if (NGX_HAVE_FILE_IOURING) +#else + #define SYS_io_setup 245 #define SYS_io_destroy 246 #define SYS_io_getevents 247 @@ -89,9 +95,9 @@ int64_t res; /* result code for this event */ int64_t res2; /* secondary result */ }; - +#endif /* NGX_HAVE_FILE_IOURING */ +#endif /* NGX_HAVE_FILE_AIO */ -#endif #endif /* NGX_TEST_BUILD_EPOLL */ @@ -124,8 +130,25 @@ ngx_uint_t flags); #if (NGX_HAVE_FILE_AIO) +#if (NGX_HAVE_FILE_IOURING) +static void ngx_epoll_io_uring_handler(ngx_event_t *ev); + +struct io_uring ngx_ring; +struct io_uring_params ngx_ring_params; + +static ngx_event_t ngx_ring_event; +static ngx_connection_t ngx_ring_conn; + +#else static void ngx_epoll_eventfd_handler(ngx_event_t *ev); -#endif + +int ngx_eventfd = -1; +aio_context_t ngx_aio_ctx = 0; + +static ngx_event_t ngx_eventfd_event; +static ngx_connection_t ngx_eventfd_conn; +#endif /* NGX_HAVE_FILE_IOURING */ +#endif /* NGX_HAVE_FILE_AIO */ static void *ngx_epoll_create_conf(ngx_cycle_t *cycle); static char *ngx_epoll_init_conf(ngx_cycle_t *cycle, void *conf); @@ -140,16 +163,6 @@ static ngx_connection_t notify_conn; #endif -#if (NGX_HAVE_FILE_AIO) - -int ngx_eventfd = -1; -aio_context_t ngx_aio_ctx = 0; - -static ngx_event_t ngx_eventfd_event; -static ngx_connection_t ngx_eventfd_conn; - -#endif - #if (NGX_HAVE_EPOLLRDHUP) ngx_uint_t ngx_use_epoll_rdhup; #endif @@ -217,6 +230,47 @@ #if (NGX_HAVE_FILE_AIO) +#if (NGX_HAVE_FILE_IOURING) + +static void +ngx_epoll_aio_init(ngx_cycle_t *cycle, ngx_epoll_conf_t *epcf) +{ + struct epoll_event ee; + + if (io_uring_queue_init_params(32763, &ngx_ring, &ngx_ring_params) < 0) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "io_uring_queue_init_params() failed"); + goto failed; + } + + ngx_ring_event.data = &ngx_ring_conn; + ngx_ring_event.handler = ngx_epoll_io_uring_handler; + ngx_ring_event.log = cycle->log; + ngx_ring_event.active = 1; + ngx_ring_conn.fd = ngx_ring.ring_fd; + ngx_ring_conn.read = &ngx_ring_event; + ngx_ring_conn.log = cycle->log; + + ee.events = EPOLLIN|EPOLLET; + ee.data.ptr = &ngx_ring_conn; + + if (epoll_ctl(ep, EPOLL_CTL_ADD, ngx_ring.ring_fd, &ee) != -1) { + return; + } + + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "epoll_ctl(EPOLL_CTL_ADD, eventfd) failed"); + + io_uring_queue_exit(&ngx_ring); + +failed: + + ngx_ring.ring_fd = 0; + ngx_file_aio = 0; +} + +#else + /* * We call io_setup(), io_destroy() io_submit(), and io_getevents() directly * as syscalls instead of libaio usage, because the library header file @@ -316,8 +370,8 @@ ngx_file_aio = 0; } -#endif - +#endif /*NGX_HAVE_FILE_IOURING*/ +#endif /*NGX_HAVE_FILE_AIO*/ static ngx_int_t ngx_epoll_init(ngx_cycle_t *cycle, ngx_msec_t timer) @@ -548,6 +602,13 @@ #endif #if (NGX_HAVE_FILE_AIO) +#if (NGX_HAVE_FILE_IOURING) + if (ngx_ring.ring_fd != 0) { + io_uring_queue_exit(&ngx_ring); + ngx_ring.ring_fd = 0; + } + +#else if (ngx_eventfd != -1) { @@ -566,7 +627,8 @@ ngx_aio_ctx = 0; -#endif +#endif /*NGX_HAVE_FILE_IOURING*/ +#endif /*NGX_HAVE_FILE_AIO*/ ngx_free(event_list); @@ -935,8 +997,42 @@ return NGX_OK; } +#if (NGX_HAVE_FILE_AIO) +#if (NGX_HAVE_FILE_IOURING) +static void +ngx_epoll_io_uring_handler(ngx_event_t *ev) +{ + ngx_event_t *e; + struct io_uring_cqe *cqe; + unsigned head; + unsigned cqe_count = 0; + ngx_event_aio_t *aio; -#if (NGX_HAVE_FILE_AIO) + ngx_log_debug(NGX_LOG_DEBUG_EVENT, ev->log, 0, + "io_uring_peek_cqe: START"); + + io_uring_for_each_cqe(&ngx_ring, head, cqe) { + ngx_log_debug3(NGX_LOG_DEBUG_EVENT, ev->log, 0, + "io_event: %p %d %d", + cqe->user_data, cqe->res, cqe->flags); + + e = (ngx_event_t *) io_uring_cqe_get_data(cqe); + e->complete = 1; + e->active = 0; + e->ready = 1; + + aio = e->data; + aio->res = cqe->res; + + ++cqe_count; + + ngx_post_event(e, &ngx_posted_events); + } + + io_uring_cq_advance(&ngx_ring, cqe_count); +} + +#else static void ngx_epoll_eventfd_handler(ngx_event_t *ev) @@ -1019,8 +1115,8 @@ } } -#endif - +#endif /*NGX_HAVE_FILE_IOURING*/ +#endif /*NGX_HAVE_FILE_AIO*/ static void * ngx_epoll_create_conf(ngx_cycle_t *cycle) diff -r b055bb6ef87e -r 95886c3353dc src/event/ngx_event.h --- a/src/event/ngx_event.h Mon Jan 11 22:06:27 2021 +0300 +++ b/src/event/ngx_event.h Wed Jan 13 11:10:05 2021 -0500 @@ -160,7 +160,11 @@ size_t nbytes; #endif +#if (NGX_HAVE_FILE_IOURING) + struct iovec iov; +#else ngx_aiocb_t aiocb; +#endif ngx_event_t event; }; diff -r b055bb6ef87e -r 95886c3353dc src/os/unix/ngx_linux_aio_read.c --- a/src/os/unix/ngx_linux_aio_read.c Mon Jan 11 22:06:27 2021 +0300 +++ b/src/os/unix/ngx_linux_aio_read.c Wed Jan 13 11:10:05 2021 -0500 @@ -9,20 +9,24 @@ #include <ngx_core.h> #include <ngx_event.h> +#if (NGX_HAVE_FILE_IOURING) +#include <liburing.h> +extern struct io_uring ngx_ring; +extern struct io_uring_params ngx_ring_params; + +#else extern int ngx_eventfd; extern aio_context_t ngx_aio_ctx; - -static void ngx_file_aio_event_handler(ngx_event_t *ev); - - static int io_submit(aio_context_t ctx, long n, struct iocb **paiocb) { return syscall(SYS_io_submit, ctx, n, paiocb); } +#endif +static void ngx_file_aio_event_handler(ngx_event_t *ev); ngx_int_t ngx_file_aio_init(ngx_file_t *file, ngx_pool_t *pool) @@ -45,7 +49,114 @@ return NGX_OK; } +#if (NGX_HAVE_FILE_IOURING) +ssize_t +ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset, + ngx_pool_t *pool) +{ + ngx_err_t err; + ngx_event_t *ev; + ngx_event_aio_t *aio; + struct io_uring_sqe *sqe; + if (!ngx_file_aio) { + return ngx_read_file(file, buf, size, offset); + } + + if (file->aio == NULL && ngx_file_aio_init(file, pool) != NGX_OK) { + return NGX_ERROR; + } + + aio = file->aio; + ev = &aio->event; + + if (!ev->ready) { + ngx_log_error(NGX_LOG_ALERT, file->log, 0, + "second aio post for \"%V\"", &file->name); + return NGX_AGAIN; + } + + ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0, + "aio complete:%d @%O:%uz %V", + ev->complete, offset, size, &file->name); + + if (ev->complete) { + ev->active = 0; + ev->complete = 0; + + if (aio->res >= 0) { + ngx_set_errno(0); + return aio->res; + } + + ngx_set_errno(-aio->res); + + ngx_log_error(NGX_LOG_CRIT, file->log, ngx_errno, + "aio read \"%s\" failed", file->name.data); + + return NGX_ERROR; + } + + sqe = io_uring_get_sqe(&ngx_ring); + + if (!sqe) { + ngx_log_debug4(NGX_LOG_DEBUG_CORE, file->log, 0, + "aio no sqe left:%d @%O:%uz %V", + ev->complete, offset, size, &file->name); + return ngx_read_file(file, buf, size, offset); + } + + if (__builtin_expect(!!(ngx_ring_params.features & IORING_FEAT_CUR_PERSONALITY), 1)) { + /* + * `io_uring_prep_read` is faster than `io_uring_prep_readv`, because the kernel + * doesn't need to import iovecs in advance. + * + * If the kernel supports `IORING_FEAT_CUR_PERSONALITY`, it should support + * non-vectored read/write commands too. + * + * It's not perfect, but avoids an extra feature-test syscall. + */ + io_uring_prep_read(sqe, file->fd, buf, size, offset); + } else { + /* + * We must store iov into heap to prevent kernel from returning -EFAULT + * in case `IORING_FEAT_SUBMIT_STABLE` is not supported + */ + aio->iov.iov_base = buf; + aio->iov.iov_len = size; + io_uring_prep_readv(sqe, file->fd, &aio->iov, 1, offset); + } + io_uring_sqe_set_data(sqe, ev); + + + ev->handler = ngx_file_aio_event_handler; + + if (io_uring_submit(&ngx_ring) == 1) { + ev->active = 1; + ev->ready = 0; + ev->complete = 0; + + return NGX_AGAIN; + } + + err = ngx_errno; + + if (err == NGX_EAGAIN) { + return ngx_read_file(file, buf, size, offset); + } + + ngx_log_error(NGX_LOG_CRIT, file->log, err, + "io_submit(\"%V\") failed", &file->name); + + if (err == NGX_ENOSYS) { + ngx_file_aio = 0; + return ngx_read_file(file, buf, size, offset); + } + + return NGX_ERROR; +} + +#else ssize_t ngx_file_aio_read(ngx_file_t *file, u_char *buf, size_t size, off_t offset, ngx_pool_t *pool) @@ -132,7 +243,7 @@ return NGX_ERROR; } - +#endif static void ngx_file_aio_event_handler(ngx_event_t *ev) diff -r b055bb6ef87e -r 95886c3353dc src/os/unix/ngx_linux_config.h --- a/src/os/unix/ngx_linux_config.h Mon Jan 11 22:06:27 2021 +0300 +++ b/src/os/unix/ngx_linux_config.h Wed Jan 13 11:10:05 2021 -0500 @@ -93,11 +93,15 @@ #include <sys/eventfd.h> #endif #include <sys/syscall.h> + #if (NGX_HAVE_FILE_AIO) +#if (NGX_HAVE_FILE_IOURING) + +#else #include <linux/aio_abi.h> typedef struct iocb ngx_aiocb_t; #endif - +#endif #if (NGX_HAVE_CAPABILITIES) #include <linux/capability.h> _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel