"git clean" uses resolve_gitlink_ref() to check for the presence of
nested git repositories, but it has the drawback of creating a
ref_cache entry for every directory that should potentially be
cleaned. The linear search through the ref_cache list causes a massive
performance hit for large number of directories.

Modify clean.c:remove_dirs to use setup.c:is_git_directory and
setup.c:read_gitfile_gently instead.

Both these functions will open files and parse contents when they find
something that looks like a git repository. This is ok from a
performance standpoint since finding repository candidates should be
comparatively rare.

Using is_git_directory and read_gitfile_gently should give a more
standardized check for what is and what isn't a git repository but
also gives a slight behavioral change. We will now detect and respect
empty nested git repositories (only init run) and empty bare
repositories that have been placed in a ".git" directory. We will also
no longer die when cleaning a file named ".git" with garbage content
(it will be cleaned instead). Update t7300 to reflect this.

The time to clean an untracked directory containing 100000 sub
directories went from 61s to 1.7s after this change.

Helped-by: Jeff King <p...@peff.net>
Signed-off-by: Erik Elfström <erik.elfst...@gmail.com>
---
 builtin/clean.c  | 25 +++++++++++++++++++++----
 t/t7300-clean.sh |  8 +++-----
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/builtin/clean.c b/builtin/clean.c
index 98c103f..5cda3c5 100644
--- a/builtin/clean.c
+++ b/builtin/clean.c
@@ -10,7 +10,6 @@
 #include "cache.h"
 #include "dir.h"
 #include "parse-options.h"
-#include "refs.h"
 #include "string-list.h"
 #include "quote.h"
 #include "column.h"
@@ -148,6 +147,26 @@ static int exclude_cb(const struct option *opt, const char 
*arg, int unset)
        return 0;
 }
 
+/*
+ * Return 1 if the given path is the root of a git repository or
+ * submodule else 0. Will not return 1 for bare repositories with the
+ * exception of creating a bare repository in "foo/.git" and calling
+ * is_git_repository("foo").
+ */
+static int is_git_repository(struct strbuf *path)
+{
+       int ret = 0;
+       size_t orig_path_len = path->len;
+       assert(orig_path_len != 0);
+       if (path->buf[orig_path_len - 1] != '/')
+               strbuf_addch(path, '/');
+       strbuf_addstr(path, ".git");
+       if (read_gitfile_gently(path->buf) || is_git_directory(path->buf))
+               ret = 1;
+       strbuf_setlen(path, orig_path_len);
+       return ret;
+}
+
 static int remove_dirs(struct strbuf *path, const char *prefix, int force_flag,
                int dry_run, int quiet, int *dir_gone)
 {
@@ -155,13 +174,11 @@ static int remove_dirs(struct strbuf *path, const char 
*prefix, int force_flag,
        struct strbuf quoted = STRBUF_INIT;
        struct dirent *e;
        int res = 0, ret = 0, gone = 1, original_len = path->len, len;
-       unsigned char submodule_head[20];
        struct string_list dels = STRING_LIST_INIT_DUP;
 
        *dir_gone = 1;
 
-       if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) &&
-                       !resolve_gitlink_ref(path->buf, "HEAD", 
submodule_head)) {
+       if ((force_flag & REMOVE_DIR_KEEP_NESTED_GIT) && 
is_git_repository(path)) {
                if (!quiet) {
                        quote_path_relative(path->buf, prefix, &quoted);
                        printf(dry_run ?  _(msg_would_skip_git_dir) : 
_(msg_skip_git_dir),
diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 4b9a72a..1bbb8ef 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -455,7 +455,7 @@ test_expect_success 'nested git work tree' '
        ! test -d bar
 '
 
-test_expect_failure 'should clean things that almost look like git but are 
not' '
+test_expect_success 'should clean things that almost look like git but are 
not' '
        rm -fr almost_git almost_bare_git almost_submodule &&
        mkdir -p almost_git/.git/objects &&
        mkdir -p almost_git/.git/refs &&
@@ -468,8 +468,6 @@ test_expect_failure 'should clean things that almost look 
like git but are not'
        garbage
        EOF
        test_when_finished "rm -rf almost_*" &&
-       ## This will fail due to die("Invalid gitfile format: %s", path); in
-       ## setup.c:read_gitfile.
        git clean -f -d &&
        test_path_is_missing almost_git &&
        test_path_is_missing almost_bare_git &&
@@ -501,7 +499,7 @@ test_expect_success 'should not clean submodules' '
        test_path_is_missing to_clean
 '
 
-test_expect_failure 'nested (empty) git should be kept' '
+test_expect_success 'nested (empty) git should be kept' '
        rm -fr foo bar &&
        git init foo &&
        mkdir bar &&
@@ -523,7 +521,7 @@ test_expect_success 'nested bare repositories should be 
cleaned' '
        test_path_is_missing subdir
 '
 
-test_expect_success 'nested (empty) bare repositories should be cleaned even 
when in .git' '
+test_expect_failure 'nested (empty) bare repositories should be cleaned even 
when in .git' '
        rm -fr strange_bare &&
        mkdir strange_bare &&
        git init --bare strange_bare/.git &&
-- 
2.4.0.rc2.5.g2871d5e

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to