Module Name:    src
Committed By:   rillig
Date:           Mon Dec 28 15:21:33 UTC 2020

Modified Files:
        src/usr.bin/make: parse.c

Log Message:
make(1): remove mmap for loading files, only allow files < 1 GiB

Using mmap is beneficial if the loaded data is read-only, or if it is
accessed in random order.  Neither of these applies here.  When loading
a file, make reads it strictly from top to bottom, once.  During
parsing, the loaded data is modified in-place to insert '\0' and '\n'
for terminating strings and lines.  Because of all of this, there is no
benefit in using mmap.

Reading the file using 2 calls to read(2) (one for the data, one for
checking for EOF) loads the data in a single pass, instead of producing
a page fault whenever the parser passes another page boundary.

Use a Buffer for loading the file data, to avoid calling bmake_realloc
directly.

Do not resize the loaded buffer at the end.  Each loaded file is
short-lived anyway, and only a few files are loaded at the same time, so
there is no point in optimizing this part for low memory usage.


To generate a diff of this commit:
cvs rdiff -u -r1.521 -r1.522 src/usr.bin/make/parse.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/make/parse.c
diff -u src/usr.bin/make/parse.c:1.521 src/usr.bin/make/parse.c:1.522
--- src/usr.bin/make/parse.c:1.521	Mon Dec 28 00:46:24 2020
+++ src/usr.bin/make/parse.c	Mon Dec 28 15:21:33 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: parse.c,v 1.521 2020/12/28 00:46:24 rillig Exp $	*/
+/*	$NetBSD: parse.c,v 1.522 2020/12/28 15:21:33 rillig Exp $	*/
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -98,26 +98,18 @@
  */
 
 #include <sys/types.h>
-#include <sys/mman.h>
 #include <sys/stat.h>
 #include <errno.h>
 #include <stdarg.h>
 #include <stdint.h>
 
-#ifndef MAP_FILE
-#define MAP_FILE 0
-#endif
-#ifndef MAP_COPY
-#define MAP_COPY MAP_PRIVATE
-#endif
-
 #include "make.h"
 #include "dir.h"
 #include "job.h"
 #include "pathnames.h"
 
 /*	"@(#)parse.c	8.3 (Berkeley) 3/19/94"	*/
-MAKE_RCSID("$NetBSD: parse.c,v 1.521 2020/12/28 00:46:24 rillig Exp $");
+MAKE_RCSID("$NetBSD: parse.c,v 1.522 2020/12/28 15:21:33 rillig Exp $");
 
 /* types and constants */
 
@@ -354,21 +346,19 @@ struct loadedfile {
 	const char *path;	/* name, for error reports */
 	char *buf;		/* contents buffer */
 	size_t len;		/* length of contents */
-	size_t maplen;		/* length of mmap area, or 0 */
 	Boolean used;		/* XXX: have we used the data yet */
 };
 
 /* XXX: What is the lifetime of the path? Who manages the memory? */
 static struct loadedfile *
-loadedfile_create(const char *path)
+loadedfile_create(const char *path, char *buf, size_t buflen)
 {
 	struct loadedfile *lf;
 
 	lf = bmake_malloc(sizeof *lf);
 	lf->path = path == NULL ? "(stdin)" : path;
-	lf->buf = NULL;
-	lf->len = 0;
-	lf->maplen = 0;
+	lf->buf = buf;
+	lf->len = buflen;
 	lf->used = FALSE;
 	return lf;
 }
@@ -376,12 +366,7 @@ loadedfile_create(const char *path)
 static void
 loadedfile_destroy(struct loadedfile *lf)
 {
-	if (lf->buf != NULL) {
-		if (lf->maplen > 0)
-			munmap(lf->buf, lf->maplen);
-		else
-			free(lf->buf);
-	}
+	free(lf->buf);
 	free(lf);
 }
 
@@ -420,65 +405,18 @@ load_getsize(int fd, size_t *ret)
 	 * st_size is an off_t, which is 64 bits signed; *ret is
 	 * size_t, which might be 32 bits unsigned or 64 bits
 	 * unsigned. Rather than being elaborate, just punt on
-	 * files that are more than 2^31 bytes. We should never
-	 * see a makefile that size in practice...
+	 * files that are more than 1 GiB. We should never
+	 * see a makefile that size in practice.
 	 *
 	 * While we're at it reject negative sizes too, just in case.
 	 */
-	if (st.st_size < 0 || st.st_size > 0x7fffffff)
+	if (st.st_size < 0 || st.st_size > 0x3fffffff)
 		return FALSE;
 
 	*ret = (size_t)st.st_size;
 	return TRUE;
 }
 
-static Boolean
-loadedfile_mmap(struct loadedfile *lf, int fd)
-{
-	static unsigned long pagesize = 0;
-
-	if (!load_getsize(fd, &lf->len))
-		return FALSE;
-
-	/* found a size, try mmap */
-	if (pagesize == 0)
-		pagesize = (unsigned long)sysconf(_SC_PAGESIZE);
-	if (pagesize == 0 || pagesize == (unsigned long)-1)
-		pagesize = 0x1000;
-
-	/* round size up to a page */
-	lf->maplen = pagesize * ((lf->len + pagesize - 1) / pagesize);
-
-	/*
-	 * XXX hack for dealing with empty files; remove when
-	 * we're no longer limited by interfacing to the old
-	 * logic elsewhere in this file.
-	 */
-	if (lf->maplen == 0)
-		lf->maplen = pagesize;
-
-	/*
-	 * FUTURE: remove PROT_WRITE when the parser no longer
-	 * needs to scribble on the input.
-	 */
-	lf->buf = mmap(NULL, lf->maplen, PROT_READ | PROT_WRITE,
-	    MAP_FILE | MAP_COPY, fd, 0);
-	if (lf->buf == MAP_FAILED)
-		return FALSE;
-
-	if (lf->len > 0 && lf->buf[lf->len - 1] != '\n') {
-		if (lf->len == lf->maplen) {
-			char *b = bmake_malloc(lf->len + 1);
-			memcpy(b, lf->buf, lf->len);
-			munmap(lf->buf, lf->maplen);
-			lf->maplen = 0;
-		}
-		lf->buf[lf->len++] = '\n';
-	}
-
-	return TRUE;
-}
-
 /*
  * Read in a file.
  *
@@ -491,75 +429,63 @@ loadedfile_mmap(struct loadedfile *lf, i
 static struct loadedfile *
 loadfile(const char *path, int fd)
 {
-	struct loadedfile *lf;
-	ssize_t result;
-	size_t bufpos;
+	ssize_t n;
+	Buffer buf;
+	size_t filesize;
 
-	lf = loadedfile_create(path);
 
 	if (path == NULL) {
 		assert(fd == -1);
 		fd = STDIN_FILENO;
-	} else {
-#if 0 /* notyet */
-		fd = open(path, O_RDONLY);
-		if (fd < 0) {
-			...
-			Error("%s: %s", path, strerror(errno));
-			exit(1);
-		}
-#endif
 	}
 
-	if (loadedfile_mmap(lf, fd))
-		goto done;
-
-	/* cannot mmap; load the traditional way */
-
-	lf->maplen = 0;
-	lf->len = 1024;
-	lf->buf = bmake_malloc(lf->len);
+	if (load_getsize(fd, &filesize)) {
+		/*
+		 * Avoid resizing the buffer later for no reason.
+		 *
+		 * At the same time leave space for adding a final '\n',
+		 * just in case it is missing in the file.
+		 */
+		filesize++;
+	} else
+		filesize = 1024;
+	Buf_InitSize(&buf, filesize);
 
-	bufpos = 0;
 	for (;;) {
-		assert(bufpos <= lf->len);
-		if (bufpos == lf->len) {
-			if (lf->len > SIZE_MAX / 2) {
+		assert(buf.len <= buf.cap);
+		if (buf.len == buf.cap) {
+			if (buf.cap > 0x1fffffff) {
 				errno = EFBIG;
 				Error("%s: file too large", path);
 				exit(2); /* Not 1 so -q can distinguish error */
 			}
-			lf->len *= 2;
-			lf->buf = bmake_realloc(lf->buf, lf->len);
+			Buf_Expand_1(&buf);
 		}
-		assert(bufpos < lf->len);
-		result = read(fd, lf->buf + bufpos, lf->len - bufpos);
-		if (result < 0) {
+		assert(buf.len < buf.cap);
+		n = read(fd, buf.data + buf.len, buf.cap - buf.len);
+		if (n < 0) {
 			Error("%s: read error: %s", path, strerror(errno));
 			exit(2);	/* Not 1 so -q can distinguish error */
 		}
-		if (result == 0)
+		if (n == 0)
 			break;
 
-		bufpos += (size_t)result;
+		buf.len += (size_t)n;
 	}
-	assert(bufpos <= lf->len);
-	lf->len = bufpos;
+	assert(buf.len <= buf.cap);
 
-	/* truncate malloc region to actual length (maybe not useful) */
-	if (lf->len > 0) {
-		/* as for mmap case, ensure trailing \n */
-		if (lf->buf[lf->len - 1] != '\n')
-			lf->len++;
-		lf->buf = bmake_realloc(lf->buf, lf->len);
-		lf->buf[lf->len - 1] = '\n';
-	}
+	if (!Buf_EndsWith(&buf, '\n'))
+		Buf_AddByte(&buf, '\n');
 
-done:
 	if (path != NULL)
 		close(fd);
 
-	return lf;
+	{
+		struct loadedfile *lf = loadedfile_create(path,
+		    buf.data, buf.len);
+		Buf_Destroy(&buf, FALSE);
+		return lf;
+	}
 }
 
 /* old code */

Reply via email to