Mostly cleanups after Nico's comments. The diff against v2 is
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 4a24bc3..88340b5 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -22,8 +22,8 @@ struct object_entry {
struct pack_idx_entry idx;
unsigned long size;
unsigned int hdr_size;
- enum object_type type;
- enum object_type real_type;
+ enum object_type type; /* type as written in pack */
+ enum object_type real_type; /* type after delta resolving */
unsigned delta_depth;
int base_object_no;
int nr_bases; /* only valid for v4 trees */
@@ -194,8 +194,10 @@ static int mark_link(struct object *obj, int type, void
*data)
return 0;
}
-/* The content of each linked object must have been checked
- or it must be already present in the object database */
+/*
+ * The content of each linked object must have been checked or it must
+ * be already present in the object database
+ */
static unsigned check_object(struct object *obj)
{
if (!obj)
@@ -289,6 +291,19 @@ static inline void *fill_and_use(int bytes)
return p;
}
+static void check_against_sha1table(const unsigned char *sha1)
+{
+ const unsigned char *found;
+ if (!packv4)
+ return;
+
+ found = bsearch(sha1, sha1_table, nr_objects, 20,
+ (int (*)(const void *, const void *))hashcmp);
+ if (!found)
+ die(_("object %s not found in SHA-1 table"),
+ sha1_to_hex(sha1));
+}
+
static NORETURN void bad_object(unsigned long offset, const char *format,
...) __attribute__((format (printf, 2, 3)));
@@ -325,15 +340,8 @@ static const unsigned char *read_sha1ref(void)
static const unsigned char *read_sha1table_ref(void)
{
const unsigned char *sha1 = read_sha1ref();
- if (sha1 < sha1_table || sha1 >= sha1_table + nr_objects * 20) {
- unsigned char *found;
- found = bsearch(sha1, sha1_table, nr_objects, 20,
- (int (*)(const void *, const void *))hashcmp);
- if (!found)
- bad_object(consumed_bytes,
- _("SHA-1 %s not found in SHA-1 table"),
- sha1_to_hex(sha1));
- }
+ if (sha1 < sha1_table || sha1 >= sha1_table + nr_objects * 20)
+ check_against_sha1table(sha1);
return sha1;
}
@@ -346,21 +354,6 @@ static const unsigned char *read_dictref(struct
packv4_dict *dict)
return dict->data + dict->offsets[index];
}
-static void *read_data(int size)
-{
- const int max = sizeof(input_buffer);
- void *buf;
- char *p;
- p = buf = xmalloc(size);
- while (size) {
- int to_fill = size > max ? max : size;
- memcpy(p, fill_and_use(to_fill), to_fill);
- p += to_fill;
- size -= to_fill;
- }
- return buf;
-}
-
static const char *open_pack_file(const char *pack_name)
{
if (from_stdin) {
@@ -532,8 +525,7 @@ static void read_and_inflate(unsigned long offset,
git_SHA1_Final(sha1, ctx);
}
-static void *unpack_commit_v4(unsigned int offset,
- unsigned long size,
+static void *unpack_commit_v4(unsigned int offset, unsigned long size,
unsigned char *sha1)
{
unsigned int nb_parents;
@@ -622,7 +614,8 @@ static void add_tree_delta_base(struct object_entry *obj,
* v4 trees are actually kind of deltas and we don't do delta in the
* first pass. This function only walks through a tree object to find
* the end offset, register object dependencies and performs limited
- * validation.
+ * validation. For v4 trees that have no dependencies, we do
+ * uncompress and calculate their SHA-1.
*/
static void *unpack_tree_v4(struct object_entry *obj,
unsigned int offset, unsigned long size,
@@ -641,9 +634,9 @@ static void *unpack_tree_v4(struct object_entry *obj,
add_tree_delta_base(obj, last_base,
delta_start);
} else if (!last_base)
bad_object(offset,
- _("bad copy count index in
unpack_tree_v4"));
+ _("missing delta base
unpack_tree_v4"));
copy_count >>= 1;
- if (!copy_count)
+ if (!copy_count || copy_count > nr)
bad_object(offset,
_("bad copy count index in
unpack_tree_v4"));
nr -= copy_count;
@@ -657,6 +650,13 @@ static void *unpack_tree_v4(struct object_entry *obj,
entry_sha1 = read_sha1ref();
nr--;
+ /*
+ * Attempt to rebuild a canonical (base) tree.
+ * If last_base is set, this tree depends on
+ * another tree, which we have no access at this
+ * stage, so reconstruction must be delayed until
+ * the second pass.
+ */
if (!last_base) {
const unsigned char *path;
unsigned mode;
@@ -694,6 +694,11 @@ static void *unpack_tree_v4(struct object_entry *obj,
}
}
+/*
+ * Unpack an entry data in the streamed pack, calculate the object
+ * SHA-1 if it's not a large blob. Otherwise just try to inflate the
+ * object to /dev/null to determine the end of the entry in the pack.
+ */
static void *unpack_entry_data(struct object_entry *obj, unsigned char *sha1)
{
static char fixed_buf[8192];
@@ -799,19 +804,23 @@ static void *unpack_raw_entry(struct object_entry *obj,
return data;
}
+/*
+ * Some checks are skipped because they are already done by
+ * unpack_tree_v4() in the first pass.
+ */
static void *patch_one_base_tree(const struct object_entry *src,
const unsigned char *src_buf,
const unsigned char *delta_buf,
unsigned long delta_size,
unsigned long *dst_size)
{
- unsigned int nr;
+ int nr;
const unsigned char *last_base = NULL;
struct strbuf sb = STRBUF_INIT;
const unsigned char *p = delta_buf;
nr = decode_varint(&p);
- while (nr && p < delta_buf + delta_size) {
+ while (nr > 0 && p < delta_buf + delta_size) {
unsigned int copy_start_or_path = decode_varint(&p);
if (copy_start_or_path & 1) { /* copy_start */
struct tree_desc desc;
@@ -829,11 +838,9 @@ static void *patch_one_base_tree(const struct object_entry
*src,
last_base = sha1_table + (id - 1) * 20;
if (hashcmp(last_base, src->idx.sha1))
die(_("bad tree base in
patch_one_base_tree"));
- } else if (!last_base)
- die(_("bad copy count index in
patch_one_base_tree"));
+ }
+
copy_count >>= 1;
- if (!copy_count)
- die(_("bad copy count index in
patch_one_base_tree"));
nr -= copy_count;
init_tree_desc(&desc, src_buf, src->size);
@@ -841,7 +848,8 @@ static void *patch_one_base_tree(const struct object_entry
*src,
if (copy_start)
copy_start--;
else if (copy_count) {
- strbuf_addf(&sb, "%o %s%c", entry.mode,
entry.path, '\0');
+ strbuf_addf(&sb, "%o %s%c",
+ entry.mode, entry.path,
'\0');
strbuf_add(&sb, entry.sha1, 20);
copy_count--;
} else
@@ -854,8 +862,6 @@ static void *patch_one_base_tree(const struct object_entry
*src,
unsigned int id;
const unsigned char *entry_sha1;
- if (path_idx >= path_dict->nb_entries)
- die(_("bad path index in unpack_tree_v4"));
id = decode_varint(&p);
if (!id) {
entry_sha1 = p;
@@ -876,6 +882,11 @@ static void *patch_one_base_tree(const struct object_entry
*src,
return sb.buf;
}
+/*
+ * Unpack entry data in the second pass when the pack is already
+ * stored on disk. consume call back is used for large-blob case. Must
+ * be thread safe.
+ */
static void *unpack_data(struct object_entry *obj,
int (*consume)(const unsigned char *, unsigned long,
void *),
void *cb_data)
@@ -1079,19 +1090,6 @@ static int check_collison(struct object_entry *entry)
return 0;
}
-static void check_against_sha1table(struct object_entry *obj)
-{
- const unsigned char *found;
- if (!packv4)
- return;
-
- found = bsearch(obj->idx.sha1, sha1_table, nr_objects, 20,
- (int (*)(const void *, const void *))hashcmp);
- if (!found)
- die(_("object %s not found in SHA-1 table"),
- sha1_to_hex(obj->idx.sha1));
-}
-
static void sha1_object(const void *data, struct object_entry *obj_entry,
unsigned long size, enum object_type type,
const unsigned char *sha1)
@@ -1288,7 +1286,7 @@ static void resolve_delta(struct object_entry *delta_obj,
bad_object(delta_obj->idx.offset, _("failed to apply delta"));
hash_sha1_file(result->data, result->size,
typename(delta_obj->real_type), delta_obj->idx.sha1);
- check_against_sha1table(delta_obj);
+ check_against_sha1table(delta_obj->idx.sha1);
sha1_object(result->data, NULL, result->size, delta_obj->real_type,
delta_obj->idx.sha1);
counter_lock();
@@ -1296,6 +1294,11 @@ static void resolve_delta(struct object_entry *delta_obj,
counter_unlock();
}
+/*
+ * Given a base object, search for all objects depending on the base,
+ * try to unpack one of those object. The function will be called
+ * repeatedly until all objects are unpacked.
+ */
static struct base_data *find_unresolved_deltas_1(struct base_data *base,
struct base_data *prev_base)
{
@@ -1408,6 +1411,10 @@ static int compare_delta_entry(const void *a, const void
*b)
objects[delta_b->obj_no].type);
}
+/*
+ * Unpack all objects depending directly or indirectly on the given
+ * object
+ */
static void resolve_base(struct object_entry *obj)
{
struct base_data *base_obj = alloc_base_data();
@@ -1417,6 +1424,7 @@ static void resolve_base(struct object_entry *obj)
}
#ifndef NO_PTHREADS
+/* Call resolve_base() in multiple threads */
static void *threaded_second_pass(void *data)
{
set_thread_data(data);
@@ -1460,10 +1468,19 @@ static struct packv4_dict *read_dict(void)
static void parse_dictionaries(void)
{
+ int i;
if (!packv4)
return;
- sha1_table = read_data(20 * nr_objects);
+ sha1_table = xmalloc(20 * nr_objects);
+ hashcpy(sha1_table, fill_and_use(20));
+ for (i = 1; i < nr_objects; i++) {
+ unsigned char *p = sha1_table + i * 20;
+ hashcpy(p, fill_and_use(20));
+ if (hashcmp(p - 20, p) >= 0)
+ die(_("wrong order in SHA-1 table at entry %d"), i);
+ }
+
name_dict = read_dict();
path_dict = read_dict();
}
@@ -1492,9 +1509,9 @@ static void parse_pack_objects(unsigned char *sha1)
/* large blobs, check later */
obj->real_type = OBJ_BAD;
nr_delays++;
- check_against_sha1table(obj);
+ check_against_sha1table(obj->idx.sha1);
} else {
- check_against_sha1table(obj);
+ check_against_sha1table(obj->idx.sha1);
sha1_object(data, NULL, obj->size, obj->real_type,
obj->idx.sha1);
}
@@ -2137,14 +2154,8 @@ int cmd_index_pack(int argc, const char **argv, const
char *prefix)
free(index_name_buf);
free(keep_name_buf);
free(sha1_table);
- if (name_dict) {
- free((void*)name_dict->data);
- free(name_dict);
- }
- if (path_dict) {
- free((void*)path_dict->data);
- free(path_dict);
- }
+ pv4_free_dict(name_dict);
+ pv4_free_dict(path_dict);
if (pack_name == NULL)
free((void *) curr_pack);
if (index_name == NULL)
diff --git a/packv4-parse.c b/packv4-parse.c
index 82661ba..d515bb9 100644
--- a/packv4-parse.c
+++ b/packv4-parse.c
@@ -63,6 +63,14 @@ struct packv4_dict *pv4_create_dict(const unsigned char
*data, int dict_size)
return dict;
}
+void pv4_free_dict(struct packv4_dict *dict)
+{
+ if (dict) {
+ free((void*)dict->data);
+ free(dict);
+ }
+}
+
static struct packv4_dict *load_dict(struct packed_git *p, off_t *offset)
{
struct pack_window *w_curs = NULL;
diff --git a/packv4-parse.h b/packv4-parse.h
index 0b2405a..e6719f6 100644
--- a/packv4-parse.h
+++ b/packv4-parse.h
@@ -8,6 +8,7 @@ struct packv4_dict {
};
struct packv4_dict *pv4_create_dict(const unsigned char *data, int dict_size);
+void pv4_free_dict(struct packv4_dict *dict);
void *pv4_get_commit(struct packed_git *p, struct pack_window **w_curs,
off_t offset, unsigned long size);
diff --git a/sha1_file.c b/sha1_file.c
index c7bf677..1528e28 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -763,6 +763,8 @@ void free_pack_by_name(const char *pack_name)
}
close_pack_index(p);
free(p->bad_object_sha1);
+ pv4_free_dict(p->ident_dict);
+ pv4_free_dict(p->path_dict);
*pp = p->next;
if (last_found_pack == p)
last_found_pack = NULL;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html