Mostly cleanups after Nico's comments. The diff against v2 is

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 4a24bc3..88340b5 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -22,8 +22,8 @@ struct object_entry {
        struct pack_idx_entry idx;
        unsigned long size;
        unsigned int hdr_size;
-       enum object_type type;
-       enum object_type real_type;
+       enum object_type type;  /* type as written in pack */
+       enum object_type real_type; /* type after delta resolving */
        unsigned delta_depth;
        int base_object_no;
        int nr_bases;           /* only valid for v4 trees */
@@ -194,8 +194,10 @@ static int mark_link(struct object *obj, int type, void 
*data)
        return 0;
 }
 
-/* The content of each linked object must have been checked
-   or it must be already present in the object database */
+/*
+ * The content of each linked object must have been checked or it must
+ * be already present in the object database
+ */
 static unsigned check_object(struct object *obj)
 {
        if (!obj)
@@ -289,6 +291,19 @@ static inline void *fill_and_use(int bytes)
        return p;
 }
 
+static void check_against_sha1table(const unsigned char *sha1)
+{
+       const unsigned char *found;
+       if (!packv4)
+               return;
+
+       found = bsearch(sha1, sha1_table, nr_objects, 20,
+                       (int (*)(const void *, const void *))hashcmp);
+       if (!found)
+               die(_("object %s not found in SHA-1 table"),
+                   sha1_to_hex(sha1));
+}
+
 static NORETURN void bad_object(unsigned long offset, const char *format,
                       ...) __attribute__((format (printf, 2, 3)));
 
@@ -325,15 +340,8 @@ static const unsigned char *read_sha1ref(void)
 static const unsigned char *read_sha1table_ref(void)
 {
        const unsigned char *sha1 = read_sha1ref();
-       if (sha1 < sha1_table || sha1 >= sha1_table + nr_objects * 20) {
-               unsigned char *found;
-               found = bsearch(sha1, sha1_table, nr_objects, 20,
-                               (int (*)(const void *, const void *))hashcmp);
-               if (!found)
-                       bad_object(consumed_bytes,
-                                  _("SHA-1 %s not found in SHA-1 table"),
-                                  sha1_to_hex(sha1));
-       }
+       if (sha1 < sha1_table || sha1 >= sha1_table + nr_objects * 20)
+               check_against_sha1table(sha1);
        return sha1;
 }
 
@@ -346,21 +354,6 @@ static const unsigned char *read_dictref(struct 
packv4_dict *dict)
        return  dict->data + dict->offsets[index];
 }
 
-static void *read_data(int size)
-{
-       const int max = sizeof(input_buffer);
-       void *buf;
-       char *p;
-       p = buf = xmalloc(size);
-       while (size) {
-               int to_fill = size > max ? max : size;
-               memcpy(p, fill_and_use(to_fill), to_fill);
-               p += to_fill;
-               size -= to_fill;
-       }
-       return buf;
-}
-
 static const char *open_pack_file(const char *pack_name)
 {
        if (from_stdin) {
@@ -532,8 +525,7 @@ static void read_and_inflate(unsigned long offset,
                git_SHA1_Final(sha1, ctx);
 }
 
-static void *unpack_commit_v4(unsigned int offset,
-                             unsigned long size,
+static void *unpack_commit_v4(unsigned int offset, unsigned long size,
                              unsigned char *sha1)
 {
        unsigned int nb_parents;
@@ -622,7 +614,8 @@ static void add_tree_delta_base(struct object_entry *obj,
  * v4 trees are actually kind of deltas and we don't do delta in the
  * first pass. This function only walks through a tree object to find
  * the end offset, register object dependencies and performs limited
- * validation.
+ * validation. For v4 trees that have no dependencies, we do
+ * uncompress and calculate their SHA-1.
  */
 static void *unpack_tree_v4(struct object_entry *obj,
                            unsigned int offset, unsigned long size,
@@ -641,9 +634,9 @@ static void *unpack_tree_v4(struct object_entry *obj,
                                add_tree_delta_base(obj, last_base, 
delta_start);
                        } else if (!last_base)
                                bad_object(offset,
-                                          _("bad copy count index in 
unpack_tree_v4"));
+                                          _("missing delta base 
unpack_tree_v4"));
                        copy_count >>= 1;
-                       if (!copy_count)
+                       if (!copy_count || copy_count > nr)
                                bad_object(offset,
                                           _("bad copy count index in 
unpack_tree_v4"));
                        nr -= copy_count;
@@ -657,6 +650,13 @@ static void *unpack_tree_v4(struct object_entry *obj,
                        entry_sha1 = read_sha1ref();
                        nr--;
 
+                       /*
+                        * Attempt to rebuild a canonical (base) tree.
+                        * If last_base is set, this tree depends on
+                        * another tree, which we have no access at this
+                        * stage, so reconstruction must be delayed until
+                        * the second pass.
+                        */
                        if (!last_base) {
                                const unsigned char *path;
                                unsigned mode;
@@ -694,6 +694,11 @@ static void *unpack_tree_v4(struct object_entry *obj,
        }
 }
 
+/*
+ * Unpack an entry data in the streamed pack, calculate the object
+ * SHA-1 if it's not a large blob. Otherwise just try to inflate the
+ * object to /dev/null to determine the end of the entry in the pack.
+ */
 static void *unpack_entry_data(struct object_entry *obj, unsigned char *sha1)
 {
        static char fixed_buf[8192];
@@ -799,19 +804,23 @@ static void *unpack_raw_entry(struct object_entry *obj,
        return data;
 }
 
+/*
+ * Some checks are skipped because they are already done by
+ * unpack_tree_v4() in the first pass.
+ */
 static void *patch_one_base_tree(const struct object_entry *src,
                                 const unsigned char *src_buf,
                                 const unsigned char *delta_buf,
                                 unsigned long delta_size,
                                 unsigned long *dst_size)
 {
-       unsigned int nr;
+       int nr;
        const unsigned char *last_base = NULL;
        struct strbuf sb = STRBUF_INIT;
        const unsigned char *p = delta_buf;
 
        nr = decode_varint(&p);
-       while (nr && p < delta_buf + delta_size) {
+       while (nr > 0 && p < delta_buf + delta_size) {
                unsigned int copy_start_or_path = decode_varint(&p);
                if (copy_start_or_path & 1) { /* copy_start */
                        struct tree_desc desc;
@@ -829,11 +838,9 @@ static void *patch_one_base_tree(const struct object_entry 
*src,
                                        last_base = sha1_table + (id - 1) * 20;
                                if (hashcmp(last_base, src->idx.sha1))
                                        die(_("bad tree base in 
patch_one_base_tree"));
-                       } else if (!last_base)
-                               die(_("bad copy count index in 
patch_one_base_tree"));
+                       }
+
                        copy_count >>= 1;
-                       if (!copy_count)
-                               die(_("bad copy count index in 
patch_one_base_tree"));
                        nr -= copy_count;
 
                        init_tree_desc(&desc, src_buf, src->size);
@@ -841,7 +848,8 @@ static void *patch_one_base_tree(const struct object_entry 
*src,
                                if (copy_start)
                                        copy_start--;
                                else if (copy_count) {
-                                       strbuf_addf(&sb, "%o %s%c", entry.mode, 
entry.path, '\0');
+                                       strbuf_addf(&sb, "%o %s%c",
+                                                   entry.mode, entry.path, 
'\0');
                                        strbuf_add(&sb, entry.sha1, 20);
                                        copy_count--;
                                } else
@@ -854,8 +862,6 @@ static void *patch_one_base_tree(const struct object_entry 
*src,
                        unsigned int id;
                        const unsigned char *entry_sha1;
 
-                       if (path_idx >= path_dict->nb_entries)
-                               die(_("bad path index in unpack_tree_v4"));
                        id = decode_varint(&p);
                        if (!id) {
                                entry_sha1 = p;
@@ -876,6 +882,11 @@ static void *patch_one_base_tree(const struct object_entry 
*src,
        return sb.buf;
 }
 
+/*
+ * Unpack entry data in the second pass when the pack is already
+ * stored on disk. consume call back is used for large-blob case. Must
+ * be thread safe.
+ */
 static void *unpack_data(struct object_entry *obj,
                         int (*consume)(const unsigned char *, unsigned long, 
void *),
                         void *cb_data)
@@ -1079,19 +1090,6 @@ static int check_collison(struct object_entry *entry)
        return 0;
 }
 
-static void check_against_sha1table(struct object_entry *obj)
-{
-       const unsigned char *found;
-       if (!packv4)
-               return;
-
-       found = bsearch(obj->idx.sha1, sha1_table, nr_objects, 20,
-                       (int (*)(const void *, const void *))hashcmp);
-       if (!found)
-               die(_("object %s not found in SHA-1 table"),
-                   sha1_to_hex(obj->idx.sha1));
-}
-
 static void sha1_object(const void *data, struct object_entry *obj_entry,
                        unsigned long size, enum object_type type,
                        const unsigned char *sha1)
@@ -1288,7 +1286,7 @@ static void resolve_delta(struct object_entry *delta_obj,
                bad_object(delta_obj->idx.offset, _("failed to apply delta"));
        hash_sha1_file(result->data, result->size,
                       typename(delta_obj->real_type), delta_obj->idx.sha1);
-       check_against_sha1table(delta_obj);
+       check_against_sha1table(delta_obj->idx.sha1);
        sha1_object(result->data, NULL, result->size, delta_obj->real_type,
                    delta_obj->idx.sha1);
        counter_lock();
@@ -1296,6 +1294,11 @@ static void resolve_delta(struct object_entry *delta_obj,
        counter_unlock();
 }
 
+/*
+ * Given a base object, search for all objects depending on the base,
+ * try to unpack one of those object. The function will be called
+ * repeatedly until all objects are unpacked.
+ */
 static struct base_data *find_unresolved_deltas_1(struct base_data *base,
                                                  struct base_data *prev_base)
 {
@@ -1408,6 +1411,10 @@ static int compare_delta_entry(const void *a, const void 
*b)
                                   objects[delta_b->obj_no].type);
 }
 
+/*
+ * Unpack all objects depending directly or indirectly on the given
+ * object
+ */
 static void resolve_base(struct object_entry *obj)
 {
        struct base_data *base_obj = alloc_base_data();
@@ -1417,6 +1424,7 @@ static void resolve_base(struct object_entry *obj)
 }
 
 #ifndef NO_PTHREADS
+/* Call resolve_base() in multiple threads */
 static void *threaded_second_pass(void *data)
 {
        set_thread_data(data);
@@ -1460,10 +1468,19 @@ static struct packv4_dict *read_dict(void)
 
 static void parse_dictionaries(void)
 {
+       int i;
        if (!packv4)
                return;
 
-       sha1_table = read_data(20 * nr_objects);
+       sha1_table = xmalloc(20 * nr_objects);
+       hashcpy(sha1_table, fill_and_use(20));
+       for (i = 1; i < nr_objects; i++) {
+               unsigned char *p = sha1_table + i * 20;
+               hashcpy(p, fill_and_use(20));
+               if (hashcmp(p - 20, p) >= 0)
+                       die(_("wrong order in SHA-1 table at entry %d"), i);
+       }
+
        name_dict = read_dict();
        path_dict = read_dict();
 }
@@ -1492,9 +1509,9 @@ static void parse_pack_objects(unsigned char *sha1)
                        /* large blobs, check later */
                        obj->real_type = OBJ_BAD;
                        nr_delays++;
-                       check_against_sha1table(obj);
+                       check_against_sha1table(obj->idx.sha1);
                } else {
-                       check_against_sha1table(obj);
+                       check_against_sha1table(obj->idx.sha1);
                        sha1_object(data, NULL, obj->size, obj->real_type,
                                    obj->idx.sha1);
                }
@@ -2137,14 +2154,8 @@ int cmd_index_pack(int argc, const char **argv, const 
char *prefix)
        free(index_name_buf);
        free(keep_name_buf);
        free(sha1_table);
-       if (name_dict) {
-               free((void*)name_dict->data);
-               free(name_dict);
-       }
-       if (path_dict) {
-               free((void*)path_dict->data);
-               free(path_dict);
-       }
+       pv4_free_dict(name_dict);
+       pv4_free_dict(path_dict);
        if (pack_name == NULL)
                free((void *) curr_pack);
        if (index_name == NULL)
diff --git a/packv4-parse.c b/packv4-parse.c
index 82661ba..d515bb9 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -63,6 +63,14 @@ struct packv4_dict *pv4_create_dict(const unsigned char 
*data, int dict_size)
        return dict;
 }
 
+void pv4_free_dict(struct packv4_dict *dict)
+{
+       if (dict) {
+               free((void*)dict->data);
+               free(dict);
+       }
+}
+
 static struct packv4_dict *load_dict(struct packed_git *p, off_t *offset)
 {
        struct pack_window *w_curs = NULL;
diff --git a/packv4-parse.h b/packv4-parse.h
index 0b2405a..e6719f6 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -8,6 +8,7 @@ struct packv4_dict {
 };
 
 struct packv4_dict *pv4_create_dict(const unsigned char *data, int dict_size);
+void pv4_free_dict(struct packv4_dict *dict);
 
 void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
                     off_t offset, unsigned long size);
diff --git a/sha1_file.c b/sha1_file.c
index c7bf677..1528e28 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -763,6 +763,8 @@ void free_pack_by_name(const char *pack_name)
                        }
                        close_pack_index(p);
                        free(p->bad_object_sha1);
+                       pv4_free_dict(p->ident_dict);
+                       pv4_free_dict(p->path_dict);
                        *pp = p->next;
                        if (last_found_pack == p)
                                last_found_pack = NULL;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to