The File System Excludes module is a new programmatic way to exclude files and
folders from git's traversal of the working directory.  fsexcludes_init() should
be called with a string buffer that contains a NUL separated list of path names
of the files and/or directories that should be included.  Any path not listed
will be excluded. The paths should be relative to the root of the working
directory and be separated by a single NUL.

The excludes logic in dir.c has been updated to honor the results of
fsexcludes_is_excluded_from().  If fsexcludes does not exclude the file, the
normal excludes logic is also checked as it could further reduce the set of
files that should be included.

Signed-off-by: Ben Peart <benpe...@microsoft.com>
---
 Makefile     |   1 +
 dir.c        |  23 +++++-
 fsexcludes.c | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fsexcludes.h |  29 +++++++
 4 files changed, 262 insertions(+), 2 deletions(-)
 create mode 100644 fsexcludes.c
 create mode 100644 fsexcludes.h

diff --git a/Makefile b/Makefile
index 96f6138f63..c102d2f75a 100644
--- a/Makefile
+++ b/Makefile
@@ -819,6 +819,7 @@ LIB_OBJS += exec_cmd.o
 LIB_OBJS += fetch-object.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
+LIB_OBJS += fsexcludes.o
 LIB_OBJS += fsmonitor.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
diff --git a/dir.c b/dir.c
index 63a917be45..1aa639b9f4 100644
--- a/dir.c
+++ b/dir.c
@@ -18,6 +18,7 @@
 #include "utf8.h"
 #include "varint.h"
 #include "ewah/ewok.h"
+#include "fsexcludes.h"
 #include "fsmonitor.h"
 
 /*
@@ -1102,6 +1103,12 @@ int is_excluded_from_list(const char *pathname,
                          struct exclude_list *el, struct index_state *istate)
 {
        struct exclude *exclude;
+
+       if (*dtype == DT_UNKNOWN)
+               *dtype = get_dtype(NULL, istate, pathname, pathlen);
+       if (fsexcludes_is_excluded_from(istate, pathname, pathlen, *dtype) > 0)
+               return 1;
+
        exclude = last_exclude_matching_from_list(pathname, pathlen, basename,
                                                  dtype, el, istate);
        if (exclude)
@@ -1317,8 +1324,15 @@ struct exclude *last_exclude_matching(struct dir_struct 
*dir,
 int is_excluded(struct dir_struct *dir, struct index_state *istate,
                const char *pathname, int *dtype_p)
 {
-       struct exclude *exclude =
-               last_exclude_matching(dir, istate, pathname, dtype_p);
+       struct exclude *exclude;
+       int pathlen = strlen(pathname);
+
+       if (*dtype_p == DT_UNKNOWN)
+               *dtype_p = get_dtype(NULL, istate, pathname, pathlen);
+       if (fsexcludes_is_excluded_from(istate, pathname, pathlen, *dtype_p) > 
0)
+               return 1;
+
+       exclude = last_exclude_matching(dir, istate, pathname, dtype_p);
        if (exclude)
                return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
        return 0;
@@ -1671,6 +1685,9 @@ static enum path_treatment treat_one_path(struct 
dir_struct *dir,
        if (dtype != DT_DIR && has_path_in_index)
                return path_none;
 
+       if (fsexcludes_is_excluded_from(istate, path->buf, path->len, dtype) > 
0)
+               return path_excluded;
+
        /*
         * When we are looking at a directory P in the working tree,
         * there are three cases:
@@ -2011,6 +2028,8 @@ static enum path_treatment 
read_directory_recursive(struct dir_struct *dir,
                /* add the path to the appropriate result list */
                switch (state) {
                case path_excluded:
+                       if (fsexcludes_is_excluded_from(istate, path.buf, 
path.len, DTYPE(cdir.de)) > 0)
+                               break;
                        if (dir->flags & DIR_SHOW_IGNORED)
                                dir_add_name(dir, istate, path.buf, path.len);
                        else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
diff --git a/fsexcludes.c b/fsexcludes.c
new file mode 100644
index 0000000000..0ef57f107b
--- /dev/null
+++ b/fsexcludes.c
@@ -0,0 +1,211 @@
+#include "cache.h"
+#include "fsexcludes.h"
+#include "hashmap.h"
+#include "strbuf.h"
+
+static int fsexcludes_initialized = 0;
+static struct strbuf fsexcludes_data = STRBUF_INIT;
+static struct hashmap fsexcludes_hashmap;
+static struct hashmap parent_directory_hashmap;
+
+struct fsexcludes {
+       struct hashmap_entry ent; /* must be the first member! */
+       const char *pattern;
+       int patternlen;
+};
+
+static unsigned int(*fsexcludeshash)(const void *buf, size_t len);
+static int(*fsexcludescmp)(const char *a, const char *b, size_t len);
+
+static int fsexcludes_hashmap_cmp(const void *unused_cmp_data,
+       const void *a, const void *b, const void *key)
+{
+       const struct fsexcludes *fse1 = a;
+       const struct fsexcludes *fse2 = b;
+
+       return fsexcludescmp(fse1->pattern, fse2->pattern, fse1->patternlen);
+}
+
+static int check_fsexcludes_hashmap(struct hashmap *map, const char *pattern, 
int patternlen)
+{
+       struct strbuf sb = STRBUF_INIT;
+       struct fsexcludes fse;
+       char *slash;
+
+       /* Check straight mapping */
+       strbuf_add(&sb, pattern, patternlen);
+       fse.pattern = sb.buf;
+       fse.patternlen = sb.len;
+       hashmap_entry_init(&fse, fsexcludeshash(fse.pattern, fse.patternlen));
+       if (hashmap_get(map, &fse, NULL)) {
+               strbuf_release(&sb);
+               return 0;
+       }
+
+       /*
+        * Check to see if it matches a directory or any path
+        * underneath it.  In other words, 'a/b/foo.txt' will match
+        * '/', 'a/', and 'a/b/'.
+        */
+       slash = strchr(sb.buf, '/');
+       while (slash) {
+               fse.pattern = sb.buf;
+               fse.patternlen = slash - sb.buf + 1;
+               hashmap_entry_init(&fse, fsexcludeshash(fse.pattern, 
fse.patternlen));
+               if (hashmap_get(map, &fse, NULL)) {
+                       strbuf_release(&sb);
+                       return 0;
+               }
+               slash = strchr(slash + 1, '/');
+       }
+
+       strbuf_release(&sb);
+       return 1;
+}
+
+static void fsexcludes_hashmap_add(struct hashmap *map, const char *pattern, 
const int patternlen)
+{
+       struct fsexcludes *fse;
+
+       fse = xmalloc(sizeof(struct fsexcludes));
+       fse->pattern = pattern;
+       fse->patternlen = patternlen;
+       hashmap_entry_init(fse, fsexcludeshash(fse->pattern, fse->patternlen));
+       hashmap_add(map, fse);
+}
+
+static void initialize_fsexcludes_hashmap(struct hashmap *map, struct strbuf 
*fsexcludes_data)
+{
+       char *buf, *entry;
+       size_t len;
+       int i;
+
+       /*
+        * Build a hashmap of the fsexcludes data we can use to look
+        * for cache entry matches quickly
+        */
+       fsexcludeshash = ignore_case ? memihash : memhash;
+       fsexcludescmp = ignore_case ? strncasecmp : strncmp;
+       hashmap_init(map, fsexcludes_hashmap_cmp, NULL, 0);
+
+       entry = buf = fsexcludes_data->buf;
+       len = fsexcludes_data->len;
+       for (i = 0; i < len; i++) {
+               if (buf[i] == '\0') {
+                       fsexcludes_hashmap_add(map, entry, buf + i - entry);
+                       entry = buf + i + 1;
+               }
+       }
+}
+
+static void parent_directory_hashmap_add(struct hashmap *map, const char 
*pattern, const int patternlen)
+{
+       char *slash;
+       struct fsexcludes *fse;
+
+       /*
+        * Add any directories leading up to the file as the excludes logic
+        * needs to match directories leading up to the files as well. Detect
+        * and prevent unnecessary duplicate entries which will be common.
+        */
+       if (patternlen > 1) {
+               slash = strchr(pattern + 1, '/');
+               while (slash) {
+                       fse = xmalloc(sizeof(struct fsexcludes));
+                       fse->pattern = pattern;
+                       fse->patternlen = slash - pattern + 1;
+                       hashmap_entry_init(fse, fsexcludeshash(fse->pattern, 
fse->patternlen));
+                       if (hashmap_get(map, fse, NULL))
+                               free(fse);
+                       else
+                               hashmap_add(map, fse);
+                       slash = strchr(slash + 1, '/');
+               }
+       }
+}
+
+static void initialize_parent_directory_hashmap(struct hashmap *map, struct 
strbuf *vfs_data)
+{
+       char *buf, *entry;
+       size_t len;
+       int i;
+
+       /*
+        * Build a hashmap of the parent directories contained in the virtual
+        * file system data we can use to look for matches quickly
+        */
+       fsexcludeshash = ignore_case ? memihash : memhash;
+       fsexcludescmp = ignore_case ? strncasecmp : strncmp;
+       hashmap_init(map, fsexcludes_hashmap_cmp, NULL, 0);
+
+       entry = buf = vfs_data->buf;
+       len = vfs_data->len;
+       for (i = 0; i < len; i++) {
+               if (buf[i] == '\0') {
+                       parent_directory_hashmap_add(map, entry, buf + i - 
entry);
+                       entry = buf + i + 1;
+               }
+       }
+}
+
+static int check_directory_hashmap(struct hashmap *map, const char *pathname, 
int pathlen)
+{
+       struct strbuf sb = STRBUF_INIT;
+       struct fsexcludes fse;
+
+       /* Check for directory */
+       strbuf_add(&sb, pathname, pathlen);
+       strbuf_addch(&sb, '/');
+       fse.pattern = sb.buf;
+       fse.patternlen = sb.len;
+       hashmap_entry_init(&fse, fsexcludeshash(fse.pattern, fse.patternlen));
+       if (hashmap_get(map, &fse, NULL)) {
+               strbuf_release(&sb);
+               return 0;
+       }
+
+       strbuf_release(&sb);
+       return 1;
+}
+
+/*
+ * Return 1 for exclude, 0 for include and -1 for undecided.
+ */
+int fsexcludes_is_excluded_from(struct index_state *istate,
+       const char *pathname, int pathlen, int dtype)
+{
+       if (!fsexcludes_initialized)
+               return -1;
+
+       if (dtype == DT_REG) {
+               /* lazily init the hashmap */
+               if (!fsexcludes_hashmap.cmpfn_data)
+                       initialize_fsexcludes_hashmap(&fsexcludes_hashmap, 
&fsexcludes_data);
+
+               return check_fsexcludes_hashmap(&fsexcludes_hashmap, pathname, 
pathlen);
+       }
+
+       if (dtype == DT_DIR || dtype == DT_LNK) {
+               /* lazily init the hashmap */
+               if (!parent_directory_hashmap.cmpfn_data)
+                       
initialize_parent_directory_hashmap(&parent_directory_hashmap, 
&fsexcludes_data);
+
+               return check_directory_hashmap(&parent_directory_hashmap, 
pathname, pathlen);
+       }
+
+       return -1;
+}
+
+void fsexcludes_init(struct strbuf *sb)
+{
+       fsexcludes_initialized = 1;
+       fsexcludes_data = *sb;
+       strbuf_detach(sb, NULL);
+}
+
+void fsexcludes_free(void) {
+       strbuf_release(&fsexcludes_data);
+       hashmap_free(&fsexcludes_hashmap, 1);
+       hashmap_free(&parent_directory_hashmap, 1);
+       fsexcludes_initialized = 0;
+}
diff --git a/fsexcludes.h b/fsexcludes.h
new file mode 100644
index 0000000000..10246daa02
--- /dev/null
+++ b/fsexcludes.h
@@ -0,0 +1,29 @@
+#ifndef FSEXCLUDES_H
+#define FSEXCLUDES_H
+
+/*
+ * The file system excludes functions provides a way to programatically limit
+ * where git will scan for untracked files.  This is used to speed up the
+ * scan by avoiding scanning parts of the work directory that do not have
+ * any new files.
+ */
+
+/*
+ * sb should contain a NUL separated list of path names of the files
+ * and/or directories that should be checked.  Any path not listed will
+ * be excluded from the scan.
+ *
+ * NOTE: fsexcludes_init() will take ownership of the storage passed in
+ * sb and will reset sb to `STRBUF_INIT`
+ */
+void fsexcludes_init(struct strbuf *sb);
+void fsexcludes_free(void);
+
+/*
+ * Return 1 for exclude, 0 for include and -1 for undecided.
+ */
+int fsexcludes_is_excluded_from(struct index_state *istate,
+       const char *pathname, int pathlen, int dtype_p);
+
+
+#endif
-- 
2.17.0.windows.1

Reply via email to