Re: [RFC PATCH 04/10] pack: move open_pack_index(), parse_pack_index()
On Tue, 08 Aug 2017 13:19:23 -0700 Junio C Hamano wrote: > Jonathan Tan writes: > > > Signed-off-by: Jonathan Tan > > --- > > builtin/count-objects.c | 1 + > > cache.h | 8 --- > > pack.c | 149 > > > > pack.h | 8 +++ > > sha1_file.c | 140 - > > sha1_name.c | 1 + > > 6 files changed, 159 insertions(+), 148 deletions(-) > > This patch is a bit strange... > > > diff --git a/pack.c b/pack.c > > index 60d9fc3b0..6edc43228 100644 > > --- a/pack.c > > +++ b/pack.c > > ... > > +static struct packed_git *alloc_packed_git(int extra) > > +{ > > + struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); > > + memset(p, 0, sizeof(*p)); > > + p->pack_fd = -1; > > + return p; > > +} > > + > > +struct packed_git *parse_pack_index(unsigned char *sha1, const char > > *idx_path) > > +{ > > + const char *path = sha1_pack_name(sha1); > > + size_t alloc = st_add(strlen(path), 1); > > + struct packed_git *p = alloc_packed_git(alloc); > > + > > + memcpy(p->pack_name, path, alloc); /* includes NUL */ > > + hashcpy(p->sha1, sha1); > > + if (check_packed_git_idx(idx_path, p)) { > > + free(p); > > + return NULL; > > + } > > + > > + return p; > > +} > > We see these two functions appear in pack.c > > > diff --git a/sha1_file.c b/sha1_file.c > > index 0de39f480..2e414f5f5 100644 > > --- a/sha1_file.c > > +++ b/sha1_file.c > > ... > > @@ -1300,22 +1176,6 @@ struct packed_git *add_packed_git(const char *path, > > size_t path_len, int local) > > return p; > > } > > > > -struct packed_git *parse_pack_index(unsigned char *sha1, const char > > *idx_path) > > -{ > > - const char *path = sha1_pack_name(sha1); > > - size_t alloc = st_add(strlen(path), 1); > > - struct packed_git *p = alloc_packed_git(alloc); > > - > > - memcpy(p->pack_name, path, alloc); /* includes NUL */ > > - hashcpy(p->sha1, sha1); > > - if (check_packed_git_idx(idx_path, p)) { > > - free(p); > > - return NULL; > > - } > > - > > - return p; > > -} > > - > > And we see parse_pack_index() came from sha1_file.c > > But where did alloc_packed_git() come from? Was the patch split > incorrectly or something? > > When I applied the whole series and did > > git blame -s -w -M -C -C master.. pack.c > > expecting that pretty much everything has come from sha1_file.c but > noticed that some lines were actually blamed to a version of pack.c > and these functions were among them. alloc_packed_git() in pack.c is a duplicate of the function of the same name in sha1_file.c in this patch, because at this patch, there are still functions in both files using this function. A subsequent patch in this patch set will remove it from pack.c. I'll add a note explaining this to this patch in the next version.
Re: [RFC PATCH 04/10] pack: move open_pack_index(), parse_pack_index()
Jonathan Tan writes: > Signed-off-by: Jonathan Tan > --- > builtin/count-objects.c | 1 + > cache.h | 8 --- > pack.c | 149 > > pack.h | 8 +++ > sha1_file.c | 140 - > sha1_name.c | 1 + > 6 files changed, 159 insertions(+), 148 deletions(-) This patch is a bit strange... > diff --git a/pack.c b/pack.c > index 60d9fc3b0..6edc43228 100644 > --- a/pack.c > +++ b/pack.c > ... > +static struct packed_git *alloc_packed_git(int extra) > +{ > + struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); > + memset(p, 0, sizeof(*p)); > + p->pack_fd = -1; > + return p; > +} > + > +struct packed_git *parse_pack_index(unsigned char *sha1, const char > *idx_path) > +{ > + const char *path = sha1_pack_name(sha1); > + size_t alloc = st_add(strlen(path), 1); > + struct packed_git *p = alloc_packed_git(alloc); > + > + memcpy(p->pack_name, path, alloc); /* includes NUL */ > + hashcpy(p->sha1, sha1); > + if (check_packed_git_idx(idx_path, p)) { > + free(p); > + return NULL; > + } > + > + return p; > +} We see these two functions appear in pack.c > diff --git a/sha1_file.c b/sha1_file.c > index 0de39f480..2e414f5f5 100644 > --- a/sha1_file.c > +++ b/sha1_file.c > ... > @@ -1300,22 +1176,6 @@ struct packed_git *add_packed_git(const char *path, > size_t path_len, int local) > return p; > } > > -struct packed_git *parse_pack_index(unsigned char *sha1, const char > *idx_path) > -{ > - const char *path = sha1_pack_name(sha1); > - size_t alloc = st_add(strlen(path), 1); > - struct packed_git *p = alloc_packed_git(alloc); > - > - memcpy(p->pack_name, path, alloc); /* includes NUL */ > - hashcpy(p->sha1, sha1); > - if (check_packed_git_idx(idx_path, p)) { > - free(p); > - return NULL; > - } > - > - return p; > -} > - And we see parse_pack_index() came from sha1_file.c But where did alloc_packed_git() come from? Was the patch split incorrectly or something? When I applied the whole series and did git blame -s -w -M -C -C master.. pack.c expecting that pretty much everything has come from sha1_file.c but noticed that some lines were actually blamed to a version of pack.c and these functions were among them.
[RFC PATCH 04/10] pack: move open_pack_index(), parse_pack_index()
Signed-off-by: Jonathan Tan --- builtin/count-objects.c | 1 + cache.h | 8 --- pack.c | 149 pack.h | 8 +++ sha1_file.c | 140 - sha1_name.c | 1 + 6 files changed, 159 insertions(+), 148 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 1d82e61f2..185d3190a 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -10,6 +10,7 @@ #include "builtin.h" #include "parse-options.h" #include "quote.h" +#include "pack.h" static unsigned long garbage; static off_t size_garbage; diff --git a/cache.h b/cache.h index c7f802e4a..5d6839525 100644 --- a/cache.h +++ b/cache.h @@ -1603,8 +1603,6 @@ struct pack_entry { struct packed_git *p; }; -extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); - /* A hook to report invalid files in pack directory */ #define PACKDIR_FILE_PACK 1 #define PACKDIR_FILE_IDX 2 @@ -1639,12 +1637,6 @@ extern int odb_mkstemp(struct strbuf *template, const char *pattern); */ extern int odb_pack_keep(const char *name); -/* - * mmap the index file for the specified packfile (if it is not - * already mmapped). Return 0 on success. - */ -extern int open_pack_index(struct packed_git *); - /* * munmap the index file for the specified packfile (if it is * currently mmapped). diff --git a/pack.c b/pack.c index 60d9fc3b0..6edc43228 100644 --- a/pack.c +++ b/pack.c @@ -1,5 +1,6 @@ #include "cache.h" #include "mru.h" +#include "pack.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -59,3 +60,151 @@ void pack_report(void) pack_open_windows, peak_pack_open_windows, sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped)); } + +/* + * Open and mmap the index file at path, perform a couple of + * consistency checks, then record its information to p. Return 0 on + * success. + */ +static int check_packed_git_idx(const char *path, struct packed_git *p) +{ + void *idx_map; + struct pack_idx_header *hdr; + size_t idx_size; + uint32_t version, nr, i, *index; + int fd = git_open(path); + struct stat st; + + if (fd < 0) + return -1; + if (fstat(fd, &st)) { + close(fd); + return -1; + } + idx_size = xsize_t(st.st_size); + if (idx_size < 4 * 256 + 20 + 20) { + close(fd); + return error("index file %s is too small", path); + } + idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + hdr = idx_map; + if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { + version = ntohl(hdr->idx_version); + if (version < 2 || version > 2) { + munmap(idx_map, idx_size); + return error("index file %s is version %"PRIu32 +" and is not supported by this binary" +" (try upgrading GIT to a newer version)", +path, version); + } + } else + version = 1; + + nr = 0; + index = idx_map; + if (version > 1) + index += 2; /* skip index header */ + for (i = 0; i < 256; i++) { + uint32_t n = ntohl(index[i]); + if (n < nr) { + munmap(idx_map, idx_size); + return error("non-monotonic index %s", path); + } + nr = n; + } + + if (version == 1) { + /* +* Total size: +* - 256 index entries 4 bytes each +* - 24-byte entries * nr (20-byte sha1 + 4-byte offset) +* - 20-byte SHA1 of the packfile +* - 20-byte SHA1 file checksum +*/ + if (idx_size != 4*256 + nr * 24 + 20 + 20) { + munmap(idx_map, idx_size); + return error("wrong index v1 file size in %s", path); + } + } else if (version == 2) { + /* +* Minimum size: +* - 8 bytes of header +* - 256 index entries 4 bytes each +* - 20-byte sha1 entry * nr +* - 4-byte crc entry * nr +* - 4-byte offset entry * nr +* - 20-byte SHA1 of the packfile +* - 20-byte SHA1 file checksum +* And after the 4-byte offset table might be a +* variable sized table containing 8-byte entries +* for offsets larger than 2^31. +*/ + unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20; + unsigned long max