Nguyễn Thái Ngọc Duy  <pclo...@gmail.com> writes:

> prepare_packed_git_one() is modified to allow count-objects to hook a
> report function to so we don't need to duplicate the pack searching
> logic in count-objects.c. When report_pack_garbage is NULL, the
> overhead is insignificant.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
> ---
>  Documentation/git-count-objects.txt |  4 +-
>  builtin/count-objects.c             | 18 ++++++++-
>  sha1_file.c                         | 81 
> +++++++++++++++++++++++++++++++++++--
>  3 files changed, 97 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/git-count-objects.txt 
> b/Documentation/git-count-objects.txt
> index e816823..1611d7c 100644
> --- a/Documentation/git-count-objects.txt
> +++ b/Documentation/git-count-objects.txt
> @@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
>  prune-packable: the number of loose objects that are also present in
>  the packs. These objects could be pruned using `git prune-packed`.
>  +
> -garbage: the number of files in loose object database that are not
> -valid loose objects
> +garbage: the number of files in object database that are not valid
> +loose objects nor valid packs
>  
>  GIT
>  ---
> diff --git a/builtin/count-objects.c b/builtin/count-objects.c
> index 9afaa88..118b2ae 100644
> --- a/builtin/count-objects.c
> +++ b/builtin/count-objects.c
> @@ -9,6 +9,20 @@
>  #include "builtin.h"
>  #include "parse-options.h"
>  
> +static unsigned long garbage;
> +
> +extern void (*report_pack_garbage)(const char *path, int len, const char 
> *name);
> +static void real_report_pack_garbage(const char *path, int len, const char 
> *name)
> +{

Don't some callers call this on paths outside objects/pack/
directory?  Is it still report-pack-garbage?

> +     if (len && name)
> +             error("garbage found: %.*s/%s", len, path, name);
> +     else if (!len && name)
> +             error("garbage found: %s%s", path, name);
> +     else
> +             error("garbage found: %s", path);
> +     garbage++;
> +}
> +
>  static void count_objects(DIR *d, char *path, int len, int verbose,
>                         unsigned long *loose,
>                         off_t *loose_size,
> @@ -76,7 +90,7 @@ int cmd_count_objects(int argc, const char **argv, const 
> char *prefix)
>       const char *objdir = get_object_directory();
>       int len = strlen(objdir);
>       char *path = xmalloc(len + 50);
> -     unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
> +     unsigned long loose = 0, packed = 0, packed_loose = 0;
>       off_t loose_size = 0;
>       struct option opts[] = {
>               OPT__VERBOSE(&verbose, N_("be verbose")),
> @@ -87,6 +101,8 @@ int cmd_count_objects(int argc, const char **argv, const 
> char *prefix)
>       /* we do not take arguments other than flags for now */
>       if (argc)
>               usage_with_options(count_objects_usage, opts);
> +     if (verbose)
> +             report_pack_garbage = real_report_pack_garbage;
>       memcpy(path, objdir, len);
>       if (len && objdir[len-1] != '/')
>               path[len++] = '/';
> diff --git a/sha1_file.c b/sha1_file.c
> index 40b2329..cc6ef03 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -21,6 +21,7 @@
>  #include "sha1-lookup.h"
>  #include "bulk-checkin.h"
>  #include "streaming.h"
> +#include "dir.h"
>  
>  #ifndef O_NOATIME
>  #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
> @@ -1000,6 +1001,54 @@ void install_packed_git(struct packed_git *pack)
>       packed_git = pack;
>  }
>  
> +/* A hook for count-objects to report invalid files in pack directory */
> +void (*report_pack_garbage)(const char *path, int len, const char *name);
> +
> +static const char *known_pack_extensions[] = { ".pack", ".keep", NULL };

This sounds wrong.  Isn't ".idx" also known?

> +static void report_garbage(struct string_list *list)
> +{
> +     struct strbuf sb = STRBUF_INIT;
> +     struct packed_git *p;
> +     int i;
> +
> +     if (!report_pack_garbage)
> +             return;
> +
> +     sort_string_list(list);
> +
> +     for (p = packed_git; p; p = p->next) {
> +             struct string_list_item *item;
> +             if (!p->pack_local)
> +                     continue;
> +             strbuf_reset(&sb);
> +             strbuf_add(&sb, p->pack_name,
> +                        strlen(p->pack_name) - strlen(".pack"));
> +             item = string_list_lookup(list, sb.buf);
> +             if (!item)
> +                     continue;
> +             /*
> +              * string_list_lookup does not guarantee to return the
> +              * first matched string if it's duplicated.
> +              */
> +             while (item - list->items &&
> +                    !strcmp(item[-1].string, item->string))
> +                     item--;
> +             while (item - list->items < list->nr &&
> +                    !strcmp(item->string, sb.buf)) {
> +                     item->util = NULL; /* non-garbage mark */
> +                     item++;
> +             }
> +     }
> +     for (i = 0; i < list->nr; i++) {
> +             struct string_list_item *item = list->items + i;
> +             if (!item->util)
> +                     continue;
> +             report_pack_garbage(item->string, 0, item->util);
> +     }
> +     strbuf_release(&sb);
> +}
> +
>  static void prepare_packed_git_one(char *objdir, int local)
>  {
>       /* Ensure that this buffer is large enough so that we can
> @@ -1009,6 +1058,7 @@ static void prepare_packed_git_one(char *objdir, int 
> local)
>       int len;
>       DIR *dir;
>       struct dirent *de;
> +     struct string_list garbage = STRING_LIST_INIT_DUP;
>  
>       sprintf(path, "%s/pack", objdir);
>       len = strlen(path);
> @@ -1024,14 +1074,37 @@ static void prepare_packed_git_one(char *objdir, int 
> local)
>               int namelen = strlen(de->d_name);
>               struct packed_git *p;
>  
> -             if (!has_extension(de->d_name, ".idx"))
> +             if (len + namelen + 1 > sizeof(path)) {
> +                     if (report_pack_garbage)
> +                             report_pack_garbage(path, len - 1, de->d_name);

A pack/in/a/very/long/path/pack-0000000000000000000000000000000000000000.pack
may pass when fed to "git verify-pack", but this will report it as "garbage",
without reporting what is wrong with it.  Wouldn't that confuse users?

>                       continue;
> +             }
> +
> +             strcpy(path + len, de->d_name);
>  
> -             if (len + namelen + 1 > sizeof(path))
> +             if (!has_extension(de->d_name, ".idx")) {
> +                     struct string_list_item *item;
> +                     int i, n;
> +                     if (!report_pack_garbage)
> +                             continue;
> +                     if (is_dot_or_dotdot(de->d_name))
> +                             continue;
> +                     for (i = 0; known_pack_extensions[i]; i++)
> +                             if (has_extension(de->d_name,
> +                                               known_pack_extensions[i]))
> +                                     break;
> +                     if (!known_pack_extensions[i]) {
> +                             report_pack_garbage(path, 0, NULL);
> +                             continue;
> +                     }
> +                     n = strlen(path) - strlen(known_pack_extensions[i]);
> +                     item = string_list_append_nodup(&garbage,
> +                                                     xstrndup(path, n));
> +                     item->util = (void*)known_pack_extensions[i];
>                       continue;
> +             }

Why isn't this part more like this?

                if (dot-or-dotdot) {
                        continue;
                } else if (has_extension(de->d_name, ".idx")) {
                        do things for the .idx file;
                } else if (has_extension(de->d_name, ".pack") {
                        do things for the .pack file, including
                        queuing the name if we haven't seen
                        corresponding .idx for later examination;
                } else if (has_extension(de->d_name, ".keep") {
                        nothing special for now but we may
                        want to add some other checks later
                } else {
                        everything else is a garbage
                        report_pack_garbage();
                }
                        

>  
>               /* Don't reopen a pack we already have. */
> -             strcpy(path + len, de->d_name);
>               for (p = packed_git; p; p = p->next) {
>                       if (!memcmp(path, p->pack_name, len + namelen - 4))
>                               break;
> @@ -1047,6 +1120,8 @@ static void prepare_packed_git_one(char *objdir, int 
> local)
>               install_packed_git(p);
>       }
>       closedir(dir);
> +     report_garbage(&garbage);
> +     string_list_clear(&garbage, 0);
>  }
>  
>  static int sort_pack(const void *a_, const void *b_)
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to