Re: [RFC PATCH 04/10] pack: move open_pack_index(), parse_pack_index()

2017-08-08 Thread Jonathan Tan
On Tue, 08 Aug 2017 13:19:23 -0700
Junio C Hamano  wrote:

> Jonathan Tan  writes:
> 
> > Signed-off-by: Jonathan Tan 
> > ---
> >  builtin/count-objects.c |   1 +
> >  cache.h |   8 ---
> >  pack.c  | 149 
> > 
> >  pack.h  |   8 +++
> >  sha1_file.c | 140 -
> >  sha1_name.c |   1 +
> >  6 files changed, 159 insertions(+), 148 deletions(-)
> 
> This patch is a bit strange...
> 
> > diff --git a/pack.c b/pack.c
> > index 60d9fc3b0..6edc43228 100644
> > --- a/pack.c
> > +++ b/pack.c
> > ...
> > +static struct packed_git *alloc_packed_git(int extra)
> > +{
> > +   struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
> > +   memset(p, 0, sizeof(*p));
> > +   p->pack_fd = -1;
> > +   return p;
> > +}
> > +
> > +struct packed_git *parse_pack_index(unsigned char *sha1, const char 
> > *idx_path)
> > +{
> > +   const char *path = sha1_pack_name(sha1);
> > +   size_t alloc = st_add(strlen(path), 1);
> > +   struct packed_git *p = alloc_packed_git(alloc);
> > +
> > +   memcpy(p->pack_name, path, alloc); /* includes NUL */
> > +   hashcpy(p->sha1, sha1);
> > +   if (check_packed_git_idx(idx_path, p)) {
> > +   free(p);
> > +   return NULL;
> > +   }
> > +
> > +   return p;
> > +}
> 
> We see these two functions appear in pack.c
> 
> > diff --git a/sha1_file.c b/sha1_file.c
> > index 0de39f480..2e414f5f5 100644
> > --- a/sha1_file.c
> > +++ b/sha1_file.c
> > ...
> > @@ -1300,22 +1176,6 @@ struct packed_git *add_packed_git(const char *path, 
> > size_t path_len, int local)
> > return p;
> >  }
> >  
> > -struct packed_git *parse_pack_index(unsigned char *sha1, const char 
> > *idx_path)
> > -{
> > -   const char *path = sha1_pack_name(sha1);
> > -   size_t alloc = st_add(strlen(path), 1);
> > -   struct packed_git *p = alloc_packed_git(alloc);
> > -
> > -   memcpy(p->pack_name, path, alloc); /* includes NUL */
> > -   hashcpy(p->sha1, sha1);
> > -   if (check_packed_git_idx(idx_path, p)) {
> > -   free(p);
> > -   return NULL;
> > -   }
> > -
> > -   return p;
> > -}
> > -
> 
> And we see parse_pack_index() came from sha1_file.c
> 
> But where did alloc_packed_git() come from?  Was the patch split
> incorrectly or something?
> 
> When I applied the whole series and did
> 
> git blame -s -w -M -C -C master.. pack.c
> 
> expecting that pretty much everything has come from sha1_file.c but
> noticed that some lines were actually blamed to a version of pack.c
> and these functions were among them.

alloc_packed_git() in pack.c is a duplicate of the function of the same
name in sha1_file.c in this patch, because at this patch, there are
still functions in both files using this function. A subsequent patch in
this patch set will remove it from pack.c.

I'll add a note explaining this to this patch in the next version.


Re: [RFC PATCH 04/10] pack: move open_pack_index(), parse_pack_index()

2017-08-08 Thread Junio C Hamano
Jonathan Tan  writes:

> Signed-off-by: Jonathan Tan 
> ---
>  builtin/count-objects.c |   1 +
>  cache.h |   8 ---
>  pack.c  | 149 
> 
>  pack.h  |   8 +++
>  sha1_file.c | 140 -
>  sha1_name.c |   1 +
>  6 files changed, 159 insertions(+), 148 deletions(-)

This patch is a bit strange...

> diff --git a/pack.c b/pack.c
> index 60d9fc3b0..6edc43228 100644
> --- a/pack.c
> +++ b/pack.c
> ...
> +static struct packed_git *alloc_packed_git(int extra)
> +{
> + struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
> + memset(p, 0, sizeof(*p));
> + p->pack_fd = -1;
> + return p;
> +}
> +
> +struct packed_git *parse_pack_index(unsigned char *sha1, const char 
> *idx_path)
> +{
> + const char *path = sha1_pack_name(sha1);
> + size_t alloc = st_add(strlen(path), 1);
> + struct packed_git *p = alloc_packed_git(alloc);
> +
> + memcpy(p->pack_name, path, alloc); /* includes NUL */
> + hashcpy(p->sha1, sha1);
> + if (check_packed_git_idx(idx_path, p)) {
> + free(p);
> + return NULL;
> + }
> +
> + return p;
> +}

We see these two functions appear in pack.c

> diff --git a/sha1_file.c b/sha1_file.c
> index 0de39f480..2e414f5f5 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> ...
> @@ -1300,22 +1176,6 @@ struct packed_git *add_packed_git(const char *path, 
> size_t path_len, int local)
>   return p;
>  }
>  
> -struct packed_git *parse_pack_index(unsigned char *sha1, const char 
> *idx_path)
> -{
> - const char *path = sha1_pack_name(sha1);
> - size_t alloc = st_add(strlen(path), 1);
> - struct packed_git *p = alloc_packed_git(alloc);
> -
> - memcpy(p->pack_name, path, alloc); /* includes NUL */
> - hashcpy(p->sha1, sha1);
> - if (check_packed_git_idx(idx_path, p)) {
> - free(p);
> - return NULL;
> - }
> -
> - return p;
> -}
> -

And we see parse_pack_index() came from sha1_file.c

But where did alloc_packed_git() come from?  Was the patch split
incorrectly or something?

When I applied the whole series and did

git blame -s -w -M -C -C master.. pack.c

expecting that pretty much everything has come from sha1_file.c but
noticed that some lines were actually blamed to a version of pack.c
and these functions were among them.


[RFC PATCH 04/10] pack: move open_pack_index(), parse_pack_index()

2017-08-08 Thread Jonathan Tan
Signed-off-by: Jonathan Tan 
---
 builtin/count-objects.c |   1 +
 cache.h |   8 ---
 pack.c  | 149 
 pack.h  |   8 +++
 sha1_file.c | 140 -
 sha1_name.c |   1 +
 6 files changed, 159 insertions(+), 148 deletions(-)

diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 1d82e61f2..185d3190a 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -10,6 +10,7 @@
 #include "builtin.h"
 #include "parse-options.h"
 #include "quote.h"
+#include "pack.h"
 
 static unsigned long garbage;
 static off_t size_garbage;
diff --git a/cache.h b/cache.h
index c7f802e4a..5d6839525 100644
--- a/cache.h
+++ b/cache.h
@@ -1603,8 +1603,6 @@ struct pack_entry {
struct packed_git *p;
 };
 
-extern struct packed_git *parse_pack_index(unsigned char *sha1, const char 
*idx_path);
-
 /* A hook to report invalid files in pack directory */
 #define PACKDIR_FILE_PACK 1
 #define PACKDIR_FILE_IDX 2
@@ -1639,12 +1637,6 @@ extern int odb_mkstemp(struct strbuf *template, const 
char *pattern);
  */
 extern int odb_pack_keep(const char *name);
 
-/*
- * mmap the index file for the specified packfile (if it is not
- * already mmapped).  Return 0 on success.
- */
-extern int open_pack_index(struct packed_git *);
-
 /*
  * munmap the index file for the specified packfile (if it is
  * currently mmapped).
diff --git a/pack.c b/pack.c
index 60d9fc3b0..6edc43228 100644
--- a/pack.c
+++ b/pack.c
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "mru.h"
+#include "pack.h"
 
 char *odb_pack_name(struct strbuf *buf,
const unsigned char *sha1,
@@ -59,3 +60,151 @@ void pack_report(void)
pack_open_windows, peak_pack_open_windows,
sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
 }
+
+/*
+ * Open and mmap the index file at path, perform a couple of
+ * consistency checks, then record its information to p.  Return 0 on
+ * success.
+ */
+static int check_packed_git_idx(const char *path, struct packed_git *p)
+{
+   void *idx_map;
+   struct pack_idx_header *hdr;
+   size_t idx_size;
+   uint32_t version, nr, i, *index;
+   int fd = git_open(path);
+   struct stat st;
+
+   if (fd < 0)
+   return -1;
+   if (fstat(fd, &st)) {
+   close(fd);
+   return -1;
+   }
+   idx_size = xsize_t(st.st_size);
+   if (idx_size < 4 * 256 + 20 + 20) {
+   close(fd);
+   return error("index file %s is too small", path);
+   }
+   idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
+   close(fd);
+
+   hdr = idx_map;
+   if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
+   version = ntohl(hdr->idx_version);
+   if (version < 2 || version > 2) {
+   munmap(idx_map, idx_size);
+   return error("index file %s is version %"PRIu32
+" and is not supported by this binary"
+" (try upgrading GIT to a newer version)",
+path, version);
+   }
+   } else
+   version = 1;
+
+   nr = 0;
+   index = idx_map;
+   if (version > 1)
+   index += 2;  /* skip index header */
+   for (i = 0; i < 256; i++) {
+   uint32_t n = ntohl(index[i]);
+   if (n < nr) {
+   munmap(idx_map, idx_size);
+   return error("non-monotonic index %s", path);
+   }
+   nr = n;
+   }
+
+   if (version == 1) {
+   /*
+* Total size:
+*  - 256 index entries 4 bytes each
+*  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
+*  - 20-byte SHA1 of the packfile
+*  - 20-byte SHA1 file checksum
+*/
+   if (idx_size != 4*256 + nr * 24 + 20 + 20) {
+   munmap(idx_map, idx_size);
+   return error("wrong index v1 file size in %s", path);
+   }
+   } else if (version == 2) {
+   /*
+* Minimum size:
+*  - 8 bytes of header
+*  - 256 index entries 4 bytes each
+*  - 20-byte sha1 entry * nr
+*  - 4-byte crc entry * nr
+*  - 4-byte offset entry * nr
+*  - 20-byte SHA1 of the packfile
+*  - 20-byte SHA1 file checksum
+* And after the 4-byte offset table might be a
+* variable sized table containing 8-byte entries
+* for offsets larger than 2^31.
+*/
+   unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
+   unsigned long max