I re-read some performance concerns about the file storage and remembered
that I said I'd send a patch, probably a year ago. So here we go.

The only thing it does is exposing the madvise parameter to the user, in
practice, it helps performance for big objects and/or rotational disk when
using MADV_SEQUENTIAL on Linux.

-- 
Guillaume Quintard
From da65af38bf016df6d4d5661e9b96a98556592a5e Mon Sep 17 00:00:00 2001
From: Guillaume Quintard <[email protected]>
Date: Wed, 15 Jun 2016 14:17:31 +0200
Subject: [PATCH] Tunable madvise for file storage

---
 bin/varnishd/mgt/mgt_main.c                 |  2 ++
 bin/varnishd/storage/storage_file.c         | 18 +++++++++++++++---
 doc/sphinx/users-guide/storage-backends.rst |  9 ++++++++-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/bin/varnishd/mgt/mgt_main.c b/bin/varnishd/mgt/mgt_main.c
index e189e61..8fa6c2c 100644
--- a/bin/varnishd/mgt/mgt_main.c
+++ b/bin/varnishd/mgt/mgt_main.c
@@ -238,6 +238,8 @@ usage(void)
 	fprintf(stderr, FMT, "", "  -s file,<dir_or_file>,<size>");
 	fprintf(stderr, FMT, "",
 	    "  -s file,<dir_or_file>,<size>,<granularity>");
+	fprintf(stderr, FMT, "",
+	    "  -s file,<dir_or_file>,<size>,<granularity>,<advice>");
 	fprintf(stderr, FMT, "", "  -s persistent (experimental)");
 	fprintf(stderr, FMT, "-T address:port",
 	    "Telnet listen address and port");
diff --git a/bin/varnishd/storage/storage_file.c b/bin/varnishd/storage/storage_file.c
index dc15ddf..460a7d5 100644
--- a/bin/varnishd/storage/storage_file.c
+++ b/bin/varnishd/storage/storage_file.c
@@ -96,6 +96,7 @@ struct smf_sc {
 	int			fd;
 	unsigned		pagesize;
 	uintmax_t		filesize;
+	int			advice;
 	struct smfhead		order;
 	struct smfhead		free[NBUCKET];
 	struct smfhead		used;
@@ -110,13 +111,14 @@ smf_init(struct stevedore *parent, int ac, char * const *av)
 	struct smf_sc *sc;
 	unsigned u;
 	uintmax_t page_size;
+	int advice = MADV_RANDOM;
 
 	AZ(av[ac]);
 
 	size = NULL;
 	page_size = getpagesize();
 
-	if (ac > 3)
+	if (ac > 4)
 		ARGV_ERR("(-sfile) too many arguments\n");
 	if (ac < 1 || *av[0] == '\0')
 		ARGV_ERR("(-sfile) path is mandatory\n");
@@ -129,6 +131,16 @@ smf_init(struct stevedore *parent, int ac, char * const *av)
 		if (r != NULL)
 			ARGV_ERR("(-sfile) granularity \"%s\": %s\n", av[2], r);
 	}
+	if (ac > 3) {
+		if (!strcmp(av[3], "normal"))
+			advice = MADV_NORMAL;
+		else if (!strcmp(av[3], "random"))
+			advice = MADV_RANDOM;
+		else if (!strcmp(av[3], "sequential"))
+			advice = MADV_SEQUENTIAL;
+		else
+			ARGV_ERR("(-s file) invalid advice: \"%s\"");
+	}
 
 	AN(fn);
 
@@ -139,7 +151,7 @@ smf_init(struct stevedore *parent, int ac, char * const *av)
 		VTAILQ_INIT(&sc->free[u]);
 	VTAILQ_INIT(&sc->used);
 	sc->pagesize = page_size;
-
+	sc->advice = advice;
 	parent->priv = sc;
 
 	(void)STV_GetFile(fn, &sc->fd, &sc->filename, "-sfile");
@@ -366,7 +378,7 @@ smf_open_chunk(struct smf_sc *sc, off_t sz, off_t off, off_t *fail, off_t *sum)
 		p = mmap(NULL, sz, PROT_READ|PROT_WRITE,
 		    MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, off);
 		if (p != MAP_FAILED) {
-			(void) madvise(p, sz, MADV_RANDOM);
+			(void) madvise(p, sz, sc->advice);
 			(*sum) += sz;
 			new_smf(sc, p, off, sz);
 			return;
diff --git a/doc/sphinx/users-guide/storage-backends.rst b/doc/sphinx/users-guide/storage-backends.rst
index 9782313..287aeac 100644
--- a/doc/sphinx/users-guide/storage-backends.rst
+++ b/doc/sphinx/users-guide/storage-backends.rst
@@ -48,7 +48,7 @@ depend on the operating systems ability to page effectively.
 file
 ~~~~
 
-syntax: file[,path[,size[,granularity]]]
+syntax: file[,path[,size[,granularity,[advice]]]]
 
 The file backend stores objects in memory backed by an unlinked file on disk
 with `mmap`.
@@ -94,6 +94,13 @@ have many small objects.
 File performance is typically limited to the write speed of the
 device, and depending on use, the seek time.
 
+'advice' dictates what Varnish tells the system to optimize reads. Depending
+on your OS, disks and object sizes, it can be beneficial yo tweak this. The
+three possible values are "normal", "random" (default) and "sequential" and
+correspond to MADV_NORMAL, MADV_RANDOM, MADV_SEQUENTIAL, respectively.
+For example, large objects and rotational disk should profit from "sequential"
+on Linux.
+
 persistent (experimental)
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- 
2.8.3

_______________________________________________
varnish-dev mailing list
[email protected]
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev

Reply via email to