It's very very rare that an uncompressd object is larger than 4GB
(partly because Git does not handle those large files very well to
begin with). Let's optimize it for the common case where object size
is smaller than this limit.
Shrink size field down to 32 bits [1] and one overflow bit. If the size
is too large, we read it back from disk.
Add two compare helpers that can take advantage of the overflow
bit (e.g. if the file is 4GB+, chances are it's already larger than
core.bigFileThreshold and there's no point in comparing the actual
value).
A small note about the conditional oe_set_size() in
check_object(). Technically if we don't get a valid type, it's not
wrong if we set uninitialized value "size" (we don't pre-initialize
this and sha1_object_info will not assign anything when it fails to
get the info).
This how changes the writing code path slightly which emits different
error messages (either way we die). One of our tests in t5530 depends
on this specific error message. Let's just keep the test as-is and
play safe by not assigning random value. That might trigger valgrind
anyway.
[1] it's actually already 32 bits on Windows
Signed-off-by: Nguyễn Thái Ngọc Duy
---
builtin/pack-objects.c | 49 ++---
pack-objects.h | 50 +-
2 files changed, 80 insertions(+), 19 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 71ca1ba2ce..887e12c556 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -274,7 +274,7 @@ static unsigned long write_no_reuse_object(struct hashfile
*f, struct object_ent
if (!usable_delta) {
if (oe_type(entry) == OBJ_BLOB &&
- entry->size > big_file_threshold &&
+ oe_size_greater_than(entry, big_file_threshold) &&
(st = open_istream(entry->idx.oid.hash, &type, &size,
NULL)) != NULL)
buf = NULL;
else {
@@ -384,12 +384,13 @@ static off_t write_reuse_object(struct hashfile *f,
struct object_entry *entry,
unsigned char header[MAX_PACK_OBJECT_HEADER],
dheader[MAX_PACK_OBJECT_HEADER];
unsigned hdrlen;
+ unsigned long entry_size = oe_size(entry);
if (DELTA(entry))
type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA;
hdrlen = encode_in_pack_object_header(header, sizeof(header),
- type, entry->size);
+ type, entry_size);
offset = entry->in_pack_offset;
revidx = find_pack_revindex(p, offset);
@@ -406,7 +407,7 @@ static off_t write_reuse_object(struct hashfile *f, struct
object_entry *entry,
datalen -= entry->in_pack_header_size;
if (!pack_to_stdout && p->index_version == 1 &&
- check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
+ check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
error("corrupt packed object for %s",
oid_to_hex(&entry->idx.oid));
unuse_pack(&w_curs);
@@ -1412,6 +1413,8 @@ static void cleanup_preferred_base(void)
static void check_object(struct object_entry *entry)
{
+ unsigned long size;
+
if (IN_PACK(entry)) {
struct packed_git *p = IN_PACK(entry);
struct pack_window *w_curs = NULL;
@@ -1431,13 +1434,14 @@ static void check_object(struct object_entry *entry)
*/
used = unpack_object_header_buffer(buf, avail,
&type,
- &entry->size);
+ &size);
if (used == 0)
goto give_up;
if (type < 0)
die("BUG: invalid type %d", type);
entry->in_pack_type = type;
+ oe_set_size(entry, size);
/*
* Determine if this is a delta and if so whether we can
@@ -1505,7 +1509,7 @@ static void check_object(struct object_entry *entry)
*/
oe_set_type(entry, entry->in_pack_type);
SET_DELTA(entry, base_entry);
- entry->delta_size = entry->size;
+ entry->delta_size = oe_size(entry);
entry->delta_sibling_idx = base_entry->delta_child_idx;
SET_DELTA_CHILD(base_entry, entry);
unuse_pack(&w_curs);
@@ -1513,14 +1517,17 @@ static void check_object(struct object_entry *entry)
}
if (oe_type(entry)) {
+ unsigned long size;
+
+ size = get_size_from_delta(p, &w_curs,
+