Re: [PATCH 08/11] pack-objects: create pack v4 tables

2013-09-09 Thread Junio C Hamano
Nicolas Pitre  writes:

> Is anyone still using --max-pack-size ?
>
> I'm wondering if producing multiple packs from pack-objects is really 
> useful these days.  If I remember correctly, this was created to allow 
> the archiving of large packs onto CDROMs or the like.

I thought this was more about using a packfile on smaller
(e.g. 32-bit) systems, but I may be mistaken.  2b84b5a8 (Introduce
the config variable pack.packSizeLimit, 2008-02-05) mentions
"filesystem constraints":

Introduce the config variable pack.packSizeLimit

"git pack-objects" has the option --max-pack-size to limit the
file size of the packs to a certain amount of bytes.  On
platforms where the pack file size is limited by filesystem
constraints, it is easy to forget this option, and this option
does not exist for "git gc" to begin with.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 08/11] pack-objects: create pack v4 tables

2013-09-09 Thread Nicolas Pitre
On Mon, 9 Sep 2013, Duy Nguyen wrote:

> On Sun, Sep 8, 2013 at 10:04 PM, Nguyễn Thái Ngọc Duy  
> wrote:
> > +static void prepare_sha1_table(void)
> > +{
> > +   unsigned i;
> > +   /*
> > +* This table includes SHA-1s that may not be present in the
> > +* pack. One of the use of such SHA-1 is for completing thin
> > +* packs, where index-pack does not need to add SHA-1 to the
> > +* table at completion time.
> > +*/
> > +   v4.all_objs = xmalloc(nr_objects * sizeof(*v4.all_objs));
> > +   v4.all_objs_nr = nr_objects;
> > +   for (i = 0; i < nr_objects; i++)
> > +   v4.all_objs[i] = objects[i].idx;
> > +   qsort(v4.all_objs, nr_objects, sizeof(*v4.all_objs),
> > + sha1_idx_sort);
> > +}
> > +
> 
> fwiw this is wrong. Even in the non-thin pack case, pack-objects could
> write multiple packs to disk and we need different sha-1 table for
> each one. The situation is worse for thin pack because not all
> preferred_base entries end up a real dependency in the final pack. I'm
> working on it..

Is anyone still using --max-pack-size ?

I'm wondering if producing multiple packs from pack-objects is really 
useful these days.  If I remember correctly, this was created to allow 
the archiving of large packs onto CDROMs or the like.

I'd be tempted to simply ignore this facility and get rid of its 
complexity if no one uses it.  Or assume that split packs will have 
inter dependencies.  Or they will be pack v2 only.


Nicolas


Re: [PATCH 08/11] pack-objects: create pack v4 tables

2013-09-09 Thread Duy Nguyen
On Sun, Sep 8, 2013 at 10:04 PM, Nguyễn Thái Ngọc Duy  wrote:
> +static void prepare_sha1_table(void)
> +{
> +   unsigned i;
> +   /*
> +* This table includes SHA-1s that may not be present in the
> +* pack. One of the use of such SHA-1 is for completing thin
> +* packs, where index-pack does not need to add SHA-1 to the
> +* table at completion time.
> +*/
> +   v4.all_objs = xmalloc(nr_objects * sizeof(*v4.all_objs));
> +   v4.all_objs_nr = nr_objects;
> +   for (i = 0; i < nr_objects; i++)
> +   v4.all_objs[i] = objects[i].idx;
> +   qsort(v4.all_objs, nr_objects, sizeof(*v4.all_objs),
> + sha1_idx_sort);
> +}
> +

fwiw this is wrong. Even in the non-thin pack case, pack-objects could
write multiple packs to disk and we need different sha-1 table for
each one. The situation is worse for thin pack because not all
preferred_base entries end up a real dependency in the final pack. I'm
working on it..
-- 
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/11] pack-objects: create pack v4 tables

2013-09-08 Thread Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 builtin/pack-objects.c | 87 --
 1 file changed, 85 insertions(+), 2 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index b38d3dc..69a22c1 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -18,6 +18,7 @@
 #include "refs.h"
 #include "streaming.h"
 #include "thread-utils.h"
+#include "packv4-create.h"
 
 static const char *pack_usage[] = {
N_("git pack-objects --stdout [options...] [< ref-list | < 
object-list]"),
@@ -61,6 +62,8 @@ static struct object_entry *objects;
 static struct pack_idx_entry **written_list;
 static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
 
+static struct packv4_tables v4;
+
 static int non_empty;
 static int reuse_delta = 1, reuse_object = 1;
 static int keep_unreachable, unpack_unreachable, include_tag;
@@ -2039,12 +2042,42 @@ static int add_ref_tag(const char *path, const unsigned 
char *sha1, int flag, vo
return 0;
 }
 
+static int sha1_idx_sort(const void *a_, const void *b_)
+{
+   const struct pack_idx_entry *a = a_;
+   const struct pack_idx_entry *b = b_;
+   return hashcmp(a->sha1, b->sha1);
+}
+
+static void prepare_sha1_table(void)
+{
+   unsigned i;
+   /*
+* This table includes SHA-1s that may not be present in the
+* pack. One of the use of such SHA-1 is for completing thin
+* packs, where index-pack does not need to add SHA-1 to the
+* table at completion time.
+*/
+   v4.all_objs = xmalloc(nr_objects * sizeof(*v4.all_objs));
+   v4.all_objs_nr = nr_objects;
+   for (i = 0; i < nr_objects; i++)
+   v4.all_objs[i] = objects[i].idx;
+   qsort(v4.all_objs, nr_objects, sizeof(*v4.all_objs),
+ sha1_idx_sort);
+}
+
 static void prepare_pack(int window, int depth)
 {
struct object_entry **delta_list;
uint32_t i, nr_deltas;
unsigned n;
 
+   if (pack_version == 4) {
+   sort_dict_entries_by_hits(v4.commit_ident_table);
+   sort_dict_entries_by_hits(v4.tree_path_table);
+   prepare_sha1_table();
+   }
+
get_object_details();
 
/*
@@ -2191,6 +2224,34 @@ static void read_object_list_from_stdin(void)
 
add_preferred_base_object(line+41);
add_object_entry(sha1, 0, line+41, 0);
+
+   if (pack_version == 4) {
+   void *data;
+   enum object_type type;
+   unsigned long size;
+   int (*add_dict_entries)(struct dict_table *, void *, 
unsigned long);
+   struct dict_table *dict;
+
+   switch (sha1_object_info(sha1, &size)) {
+   case OBJ_COMMIT:
+   add_dict_entries = add_commit_dict_entries;
+   dict = v4.commit_ident_table;
+   break;
+   case OBJ_TREE:
+   add_dict_entries = add_tree_dict_entries;
+   dict = v4.tree_path_table;
+   break;
+   default:
+   continue;
+   }
+   data = read_sha1_file(sha1, &type, &size);
+   if (!data)
+   die("cannot unpack %s", sha1_to_hex(sha1));
+   if (add_dict_entries(dict, data, size) < 0)
+   die("can't process %s object %s",
+   typename(type), sha1_to_hex(sha1));
+   free(data);
+   }
}
 }
 
@@ -2198,10 +2259,26 @@ static void read_object_list_from_stdin(void)
 
 static void show_commit(struct commit *commit, void *data)
 {
+   if (pack_version == 4) {
+   unsigned long size;
+   enum object_type type;
+   unsigned char *buf;
+
+   /* commit->buffer is NULL most of the time, don't bother */
+   buf = read_sha1_file(commit->object.sha1, &type, &size);
+   add_commit_dict_entries(v4.commit_ident_table, buf, size);
+   free(buf);
+   }
add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0);
commit->object.flags |= OBJECT_ADDED;
 }
 
+static void show_tree_entry(const struct name_entry *entry, void *data)
+{
+   dict_add_entry(v4.tree_path_table, entry->mode, entry->path,
+  tree_entry_len(entry));
+}
+
 static void show_object(struct object *obj,
const struct name_path *path, const char *last,
void *data)
@@ -2380,7 +2457,9 @@ static void get_object_list(int ac, const char **av)
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
mark_edges_uninteresting(revs.c