The final pair of chunks for the multi-pack-index file stores the object
offsets. We default to using 32-bit offsets as in the pack-index version
1 format, but if there exists an offset larger than 32-bits, we use a
trick similar to the pack-index version 2 format by storing all offsets
at least 2^31 in a 64-bit table; we use the 32-bit table to point into
that 64-bit table as necessary.
We only store these 64-bit offsets if necessary, so create a test that
manipulates a version 2 pack-index to fake a large offset. This allows
us to test that the large offset table is created, but the data does not
match the actual packfile offsets. The multi-pack-index offset does match
the (corrupted) pack-index offset, so a future feature will compare these
offsets during a 'verify' step.
Signed-off-by: Derrick Stolee
---
Documentation/technical/pack-format.txt | 15 +++-
midx.c | 100 +++-
object-store.h | 2 +
t/helper/test-read-midx.c | 4 +
t/t5319-multi-pack-index.sh | 44 ---
5 files changed, 150 insertions(+), 15 deletions(-)
diff --git a/Documentation/technical/pack-format.txt
b/Documentation/technical/pack-format.txt
index 3215f7bfcd..cab5bdd2ff 100644
--- a/Documentation/technical/pack-format.txt
+++ b/Documentation/technical/pack-format.txt
@@ -311,7 +311,20 @@ CHUNK DATA:
The OIDs for all objects in the MIDX are stored in lexicographic
order in this chunk.
- (This section intentionally left incomplete.)
+ Object Offsets (ID: {'O', 'O', 'F', 'F'})
+ Stores two 4-byte values for every object.
+ 1: The pack-int-id for the pack storing this object.
+ 2: The offset within the pack.
+ If all offsets are less than 2^31, then the large offset chunk
+ will not exist and offsets are stored as in IDX v1.
+ If there is at least one offset value larger than 2^32-1, then
+ the large offset chunk must exist. If the large offset chunk
+ exists and the 31st bit is on, then removing that bit reveals
+ the row in the large offsets containing the 8-byte offset of
+ this object.
+
+ [Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'})
+ 8-byte offsets into large packfiles.
TRAILER:
diff --git a/midx.c b/midx.c
index 404147bb9f..cc35abe7a2 100644
--- a/midx.c
+++ b/midx.c
@@ -18,13 +18,18 @@
#define MIDX_HASH_LEN 20
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
-#define MIDX_MAX_CHUNKS 3
+#define MIDX_MAX_CHUNKS 5
#define MIDX_CHUNK_ALIGNMENT 4
#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
+#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
+#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
+#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
+#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
+#define MIDX_LARGE_OFFSET_NEEDED 0x8000
static char *get_midx_filename(const char *object_dir)
{
@@ -114,6 +119,14 @@ struct multi_pack_index *load_multi_pack_index(const char
*object_dir)
m->chunk_oid_lookup = m->data + chunk_offset;
break;
+ case MIDX_CHUNKID_OBJECTOFFSETS:
+ m->chunk_object_offsets = m->data +
chunk_offset;
+ break;
+
+ case MIDX_CHUNKID_LARGEOFFSETS:
+ m->chunk_large_offsets = m->data + chunk_offset;
+ break;
+
case 0:
die(_("terminating multi-pack-index chunk id
appears earlier than expected"));
break;
@@ -133,6 +146,8 @@ struct multi_pack_index *load_multi_pack_index(const char
*object_dir)
die(_("multi-pack-index missing required OID fanout chunk"));
if (!m->chunk_oid_lookup)
die(_("multi-pack-index missing required OID lookup chunk"));
+ if (!m->chunk_object_offsets)
+ die(_("multi-pack-index missing required object offsets
chunk"));
m->num_objects = ntohl(m->chunk_oid_fanout[255]);
@@ -456,6 +471,56 @@ static size_t write_midx_oid_lookup(struct hashfile *f,
unsigned char hash_len,
return written;
}
+static size_t write_midx_object_offsets(struct hashfile *f, int
large_offset_needed,
+ struct pack_midx_entry *objects,
uint32_t nr_objects)
+{
+ struct pack_midx_entry *list = objects;
+ uint32_t i, nr_large_offset = 0;
+ size_t written = 0;
+
+ for (i