Module Name: src
Committed By: rillig
Date: Fri Oct 2 22:20:25 UTC 2020
Modified Files:
src/usr.bin/make: dir.c
Log Message:
make(1): use hash table for looking up open directories by name
As long as there are less than 20 open directories, it's perfectly fine
to use a doubly-linked list for name lookup. A singly linked list or
even an array list would have been better, but anyway.
When the number of directories rises above 1000, which happens with
dirdeps.mk, linear list lookup becomes too expensive, especially since
each list entry is compared using a strcmp call, in a callback function
that is not inlined.
Using a hash table is much more efficient than linear lookup. While
here, abstract all operations regarding the openDirectories list into a
new data type that provides a simple and straight-forward API. This
strongly typed API is especially important since the current
implementation of the list and hash table is weakly typed, using void *
for the actual data, and StringList and CachedDirList refer to the
exactly same type, they just have different names to help the human
readers but don't provide any type safety.
To generate a diff of this commit:
cvs rdiff -u -r1.154 -r1.155 src/usr.bin/make/dir.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/usr.bin/make/dir.c
diff -u src/usr.bin/make/dir.c:1.154 src/usr.bin/make/dir.c:1.155
--- src/usr.bin/make/dir.c:1.154 Thu Oct 1 22:42:00 2020
+++ src/usr.bin/make/dir.c Fri Oct 2 22:20:25 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: dir.c,v 1.154 2020/10/01 22:42:00 rillig Exp $ */
+/* $NetBSD: dir.c,v 1.155 2020/10/02 22:20:25 rillig Exp $ */
/*
* Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
@@ -136,7 +136,7 @@
#include "job.h"
/* "@(#)dir.c 8.2 (Berkeley) 1/2/94" */
-MAKE_RCSID("$NetBSD: dir.c,v 1.154 2020/10/01 22:42:00 rillig Exp $");
+MAKE_RCSID("$NetBSD: dir.c,v 1.155 2020/10/02 22:20:25 rillig Exp $");
#define DIR_DEBUG0(text) DEBUG0(DIR, text)
#define DIR_DEBUG1(fmt, arg1) DEBUG1(DIR, fmt, arg1)
@@ -219,7 +219,58 @@ typedef ListNode SearchPathNode;
SearchPath *dirSearchPath; /* main search path */
-static CachedDirList *openDirectories; /* the list of all open directories */
+/* A list of cached directories, with fast lookup by directory name. */
+typedef struct OpenDirs {
+ CachedDirList *list;
+ Hash_Table /* of CachedDirListNode */ table;
+} OpenDirs;
+
+static void
+OpenDirs_Init(OpenDirs *odirs)
+{
+ odirs->list = Lst_Init();
+ Hash_InitTable(&odirs->table);
+}
+
+static void MAKE_ATTR_UNUSED
+OpenDirs_Done(OpenDirs *odirs)
+{
+ Dir_ClearPath(odirs->list);
+ Lst_Free(odirs->list);
+ Hash_DeleteTable(&odirs->table);
+}
+
+static CachedDir *
+OpenDirs_Find(OpenDirs *odirs, const char *name)
+{
+ CachedDirListNode *ln = Hash_FindValue(&odirs->table, name);
+ return ln != NULL ? ln->datum : NULL;
+}
+
+static void
+OpenDirs_Add(OpenDirs *odirs, CachedDir *cdir)
+{
+ Hash_Entry *he = Hash_FindEntry(&odirs->table, cdir->name);
+ if (he != NULL)
+ return;
+ he = Hash_CreateEntry(&odirs->table, cdir->name, NULL);
+ Lst_Append(odirs->list, cdir);
+ Hash_SetValue(he, odirs->list->last);
+}
+
+static void
+OpenDirs_Remove(OpenDirs *odirs, const char *name)
+{
+ Hash_Entry *he = Hash_FindEntry(&odirs->table, name);
+ CachedDirListNode *ln;
+ if (he == NULL)
+ return;
+ ln = Hash_GetValue(he);
+ Hash_DeleteEntry(&odirs->table, he);
+ Lst_Remove(odirs->list, ln);
+}
+
+static OpenDirs openDirs; /* the list of all open directories */
/*
* Variables for gathering statistics on the efficiency of the hashing
@@ -337,7 +388,7 @@ void
Dir_Init(void)
{
dirSearchPath = Lst_Init();
- openDirectories = Lst_Init();
+ OpenDirs_Init(&openDirs);
Hash_InitTable(&mtimes);
Hash_InitTable(&lmtimes);
}
@@ -387,11 +438,8 @@ void
Dir_InitDot(void)
{
if (dot != NULL) {
- CachedDirListNode *ln;
-
- /* Remove old entry from openDirectories, but do not destroy. */
- ln = Lst_FindDatum(openDirectories, dot);
- Lst_Remove(openDirectories, ln);
+ /* Remove old entry from openDirs, but do not destroy. */
+ OpenDirs_Remove(&openDirs, dot->name);
}
dot = Dir_AddDir(NULL, ".");
@@ -424,8 +472,7 @@ Dir_End(void)
Dir_Destroy(dot);
Dir_ClearPath(dirSearchPath);
Lst_Free(dirSearchPath);
- Dir_ClearPath(openDirectories);
- Lst_Free(openDirectories);
+ OpenDirs_Done(&openDirs);
Hash_DeleteTable(&mtimes);
#endif
}
@@ -1482,13 +1529,12 @@ Dir_MTime(GNode *gn, Boolean recheck)
CachedDir *
Dir_AddDir(SearchPath *path, const char *name)
{
- SearchPathNode *ln = NULL;
CachedDir *dir = NULL; /* the added directory */
DIR *d;
struct dirent *dp;
if (path != NULL && strcmp(name, ".DOTLAST") == 0) {
- ln = Lst_Find(path, DirFindName, name);
+ SearchPathNode *ln = Lst_Find(path, DirFindName, name);
if (ln != NULL)
return LstNode_Datum(ln);
@@ -1497,9 +1543,8 @@ Dir_AddDir(SearchPath *path, const char
}
if (path != NULL)
- ln = Lst_Find(openDirectories, DirFindName, name);
- if (ln != NULL) {
- dir = LstNode_Datum(ln);
+ dir = OpenDirs_Find(&openDirs, name);
+ if (dir != NULL) {
if (Lst_FindDatum(path, dir) == NULL) {
dir->refCount++;
Lst_Append(path, dir);
@@ -1530,7 +1575,7 @@ Dir_AddDir(SearchPath *path, const char
(void)Hash_CreateEntry(&dir->files, dp->d_name, NULL);
}
(void)closedir(d);
- Lst_Append(openDirectories, dir);
+ OpenDirs_Add(&openDirs, dir);
if (path != NULL)
Lst_Append(path, dir);
}
@@ -1623,11 +1668,7 @@ Dir_Destroy(void *dirp)
dir->refCount--;
if (dir->refCount == 0) {
- CachedDirListNode *node;
-
- node = Lst_FindDatum(openDirectories, dir);
- if (node != NULL)
- Lst_Remove(openDirectories, node);
+ OpenDirs_Remove(&openDirs, dir->name);
Hash_DeleteTable(&dir->files);
free(dir->name);
@@ -1712,7 +1753,7 @@ Dir_PrintDirectories(void)
percentage(hits, hits + bigmisses + nearmisses));
debug_printf("# %-20s referenced\thits\n", "directory");
- for (ln = openDirectories->first; ln != NULL; ln = ln->next) {
+ for (ln = openDirs.list->first; ln != NULL; ln = ln->next) {
CachedDir *dir = ln->datum;
debug_printf("# %-20s %10d\t%4d\n", dir->name, dir->refCount,
dir->hits);