Re: [PATCH v2 06/14] commit-graph: implement git-commit-graph --read

2018-02-05 Thread Derrick Stolee

On 2/1/2018 7:23 PM, Jonathan Tan wrote:

On Tue, 30 Jan 2018 16:39:35 -0500
Derrick Stolee  wrote:


Teach git-commit-graph to read commit graph files and summarize their contents.

One overall question - is the "read" command meant to be a command used
by the end user, or is it here just to test that some aspects of reading
works? If the former, I'm not sure how useful it is. And if the latter,
I think that it is more useful to just implementing something that reads
it, then make the 11/14 change (modifying parse_commit_gently) and
include a perf test to show that your commit graph reading is both
correct and (performance-)effective.


The "read" command is intended for use with the tests to verify that the 
different --write commands write the correct number of commits at a 
time. For example, we can verify that the closure under reachability 
works when using --stdin-commits, that we get every commit in a pack 
when using --stdin-packs, and that we don't get more commits than we should.


It doesn't serve much purpose on the user-facing side, but this is 
intended to be a plumbing command that is called by other porcelain 
commands.


Re: [PATCH v2 06/14] commit-graph: implement git-commit-graph --read

2018-02-01 Thread Jonathan Tan
On Tue, 30 Jan 2018 16:39:35 -0500
Derrick Stolee  wrote:

> Teach git-commit-graph to read commit graph files and summarize their 
> contents.

One overall question - is the "read" command meant to be a command used
by the end user, or is it here just to test that some aspects of reading
works? If the former, I'm not sure how useful it is. And if the latter,
I think that it is more useful to just implementing something that reads
it, then make the 11/14 change (modifying parse_commit_gently) and
include a perf test to show that your commit graph reading is both
correct and (performance-)effective.


Re: [PATCH v2 06/14] commit-graph: implement git-commit-graph --read

2018-02-01 Thread SZEDER Gábor

> Teach git-commit-graph to read commit graph files and summarize their 
> contents.
> 
> Use the --read option to verify the contents of a commit graph file in the
> tests.
> 
> Signed-off-by: Derrick Stolee 
> ---
>  Documentation/git-commit-graph.txt |   7 ++
>  builtin/commit-graph.c |  55 +++
>  commit-graph.c | 138 
> -
>  commit-graph.h |  25 +++
>  t/t5318-commit-graph.sh|  28 ++--
>  5 files changed, 247 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/git-commit-graph.txt 
> b/Documentation/git-commit-graph.txt
> index 3f3790d9a8..09aeaf6c82 100644
> --- a/Documentation/git-commit-graph.txt
> +++ b/Documentation/git-commit-graph.txt
> @@ -10,6 +10,7 @@ SYNOPSIS
>  
>  [verse]
>  'git commit-graph' --write  [--pack-dir ]
> +'git commit-graph' --read  [--pack-dir ]

Again, what does this option do?

>  EXAMPLES
>  
> @@ -20,6 +21,12 @@ EXAMPLES
>  $ git commit-graph --write
>  
>  
> +* Read basic information from a graph file.
> ++
> +
> +$ git commit-graph --read --graph-hash=
> +
> +
>  GIT
>  ---
>  Part of the linkgit:git[1] suite
> diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
> index 7affd512f1..218740b1f8 100644
> --- a/builtin/commit-graph.c
> +++ b/builtin/commit-graph.c

> +int close_commit_graph(struct commit_graph *g)

static, perhaps?  I see it's declared as extern in the headeer file
below, but I don't see it called outside of this source file by the
end of the patch series.

> +{
> + if (g->graph_fd < 0)
> + return 0;
> +
> + munmap((void *)g->data, g->data_len);
> + g->data = 0;
> +
> + close(g->graph_fd);
> + g->graph_fd = -1;
> +
> + return 1;
> +}
> +
> +static void free_commit_graph(struct commit_graph **g)
> +{
> + if (!g || !*g)
> + return;
> +
> + close_commit_graph(*g);
> +
> + free(*g);
> + *g = NULL;
> +}
> +
> +struct commit_graph *load_commit_graph_one(const char *graph_file, const 
> char *pack_dir)
> +{
> + void *graph_map;
> + const unsigned char *data;
> + struct commit_graph_header *hdr;
> + size_t graph_size;
> + struct stat st;
> + uint32_t i;
> + struct commit_graph *graph;
> + int fd = git_open(graph_file);
> + uint64_t last_chunk_offset;
> + uint32_t last_chunk_id;
> +
> + if (fd < 0)
> + return 0;
> + if (fstat(fd, )) {
> + close(fd);
> + return 0;
> + }
> + graph_size = xsize_t(st.st_size);
> +
> + if (graph_size < GRAPH_MIN_SIZE) {
> + close(fd);
> + die("graph file %s is too small", graph_file);
> + }
> + graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
> + data = (const unsigned char *)graph_map;
> +
> + hdr = graph_map;
> + if (ntohl(hdr->graph_signature) != GRAPH_SIGNATURE) {
> + uint32_t signature = ntohl(hdr->graph_signature);
> + munmap(graph_map, graph_size);
> + close(fd);
> + die("graph signature %X does not match signature %X",
> + signature, GRAPH_SIGNATURE);
> + }
> + if (hdr->graph_version != GRAPH_VERSION) {
> + unsigned char version = hdr->graph_version;
> + munmap(graph_map, graph_size);
> + close(fd);
> + die("graph version %X does not match version %X",
> + version, GRAPH_VERSION);
> + }
> +
> + graph = alloc_commit_graph(strlen(pack_dir) + 1);
> +
> + graph->hdr = hdr;
> + graph->graph_fd = fd;
> + graph->data = graph_map;
> + graph->data_len = graph_size;
> +
> + last_chunk_id = 0;
> + last_chunk_offset = (uint64_t)sizeof(*hdr);
> + for (i = 0; i < hdr->num_chunks; i++) {
> + uint32_t chunk_id = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 
> * i));
> + uint64_t chunk_offset1 = ntohl(*(uint32_t*)(data + sizeof(*hdr) 
> + 12 * i + 4));
> + uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) 
> + 12 * i + 8));

There are a lot of magic number in these three lines, but at least
they are all multiples of 4.

> + uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2;
> +
> + if (chunk_offset > graph_size - GIT_MAX_RAWSZ)
> + die("improper chunk offset %08x%08x", 
> (uint32_t)(chunk_offset >> 32),
> + (uint32_t)chunk_offset);
> +
> + switch (chunk_id) {
> + case GRAPH_CHUNKID_OIDFANOUT:
> + graph->chunk_oid_fanout = data + chunk_offset;
> + break;
> +
> + case GRAPH_CHUNKID_OIDLOOKUP:
> +   

Re: [PATCH v2 06/14] commit-graph: implement git-commit-graph --read

2018-01-30 Thread Stefan Beller
> +static void free_commit_graph(struct commit_graph **g)
> +{
> +   if (!g || !*g)
> +   return;
> +
> +   close_commit_graph(*g);
> +
> +   free(*g);
> +   *g = NULL;

nit: You may want to use FREE_AND_NULL(*g) instead.


[PATCH v2 06/14] commit-graph: implement git-commit-graph --read

2018-01-30 Thread Derrick Stolee
Teach git-commit-graph to read commit graph files and summarize their contents.

Use the --read option to verify the contents of a commit graph file in the
tests.

Signed-off-by: Derrick Stolee 
---
 Documentation/git-commit-graph.txt |   7 ++
 builtin/commit-graph.c |  55 +++
 commit-graph.c | 138 -
 commit-graph.h |  25 +++
 t/t5318-commit-graph.sh|  28 ++--
 5 files changed, 247 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-commit-graph.txt 
b/Documentation/git-commit-graph.txt
index 3f3790d9a8..09aeaf6c82 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -10,6 +10,7 @@ SYNOPSIS
 
 [verse]
 'git commit-graph' --write  [--pack-dir ]
+'git commit-graph' --read  [--pack-dir ]
 
 EXAMPLES
 
@@ -20,6 +21,12 @@ EXAMPLES
 $ git commit-graph --write
 
 
+* Read basic information from a graph file.
++
+
+$ git commit-graph --read --graph-hash=
+
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 7affd512f1..218740b1f8 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -10,15 +10,58 @@
 
 static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph [--pack-dir ]"),
+   N_("git commit-graph --read [--graph-hash=]"),
N_("git commit-graph --write [--pack-dir ]"),
NULL
 };
 
 static struct opts_commit_graph {
const char *pack_dir;
+   int read;
+   const char *graph_hash;
int write;
 } opts;
 
+static int graph_read(void)
+{
+   struct object_id graph_hash;
+   struct commit_graph *graph = 0;
+   const char *graph_file;
+
+   if (opts.graph_hash && strlen(opts.graph_hash) == GIT_MAX_HEXSZ)
+   get_oid_hex(opts.graph_hash, _hash);
+   else
+   die("no graph hash specified");
+
+   graph_file = get_commit_graph_filename_hash(opts.pack_dir, _hash);
+   graph = load_commit_graph_one(graph_file, opts.pack_dir);
+
+   if (!graph)
+   die("graph file %s does not exist", graph_file);
+
+   printf("header: %08x %02x %02x %02x %02x\n",
+   ntohl(graph->hdr->graph_signature),
+   graph->hdr->graph_version,
+   graph->hdr->hash_version,
+   graph->hdr->hash_len,
+   graph->hdr->num_chunks);
+   printf("num_commits: %u\n", graph->num_commits);
+   printf("chunks:");
+
+   if (graph->chunk_oid_fanout)
+   printf(" oid_fanout");
+   if (graph->chunk_oid_lookup)
+   printf(" oid_lookup");
+   if (graph->chunk_commit_data)
+   printf(" commit_metadata");
+   if (graph->chunk_large_edges)
+   printf(" large_edges");
+   printf("\n");
+
+   printf("pack_dir: %s\n", graph->pack_dir);
+   return 0;
+}
+
 static int graph_write(void)
 {
struct object_id *graph_hash = construct_commit_graph(opts.pack_dir);
@@ -36,8 +79,14 @@ int cmd_commit_graph(int argc, const char **argv, const char 
*prefix)
{ OPTION_STRING, 'p', "pack-dir", _dir,
N_("dir"),
N_("The pack directory to store the graph") },
+   OPT_BOOL('r', "read", ,
+   N_("read graph file")),
OPT_BOOL('w', "write", ,
N_("write commit graph file")),
+   { OPTION_STRING, 'H', "graph-hash", _hash,
+   N_("hash"),
+   N_("A hash for a specific graph file in the pack-dir."),
+   PARSE_OPT_OPTARG, NULL, (intptr_t) "" },
OPT_END(),
};
 
@@ -49,6 +98,10 @@ int cmd_commit_graph(int argc, const char **argv, const char 
*prefix)
 builtin_commit_graph_options,
 builtin_commit_graph_usage, 0);
 
+   if (opts.write + opts.read > 1)
+   usage_with_options(builtin_commit_graph_usage,
+  builtin_commit_graph_options);
+
if (!opts.pack_dir) {
struct strbuf path = STRBUF_INIT;
strbuf_addstr(, get_object_directory());
@@ -56,6 +109,8 @@ int cmd_commit_graph(int argc, const char **argv, const char 
*prefix)
opts.pack_dir = strbuf_detach(, NULL);
}
 
+   if (opts.read)
+   return graph_read();
if (opts.write)
return graph_write();
 
diff --git a/commit-graph.c b/commit-graph.c
index db2b7390c7..622a650259 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -48,6 +48,142 @@ char* get_commit_graph_filename_hash(const char *pack_dir,
return