Re: [PATCH v3 8/9] commit-graph: always load commit-graph information

2018-04-23 Thread Derrick Stolee

On 4/18/2018 8:02 PM, Jakub Narebski wrote:

Derrick Stolee  writes:


Most code paths load commits using lookup_commit() and then
parse_commit(). In some cases, including some branch lookups, the commit
is parsed using parse_object_buffer() which side-steps parse_commit() in
favor of parse_commit_buffer().

With generation numbers in the commit-graph, we need to ensure that any
commit that exists in the commit-graph file has its generation number
loaded.

All right, that is nice explanation of the why behind this change.


Create new load_commit_graph_info() method to fill in the information
for a commit that exists only in the commit-graph file. Call it from
parse_commit_buffer() after loading the other commit information from
the given buffer. Only fill this information when specified by the
'check_graph' parameter. This avoids duplicate work when we already
checked the graph in parse_commit_gently() or when simply checking the
buffer contents in check_commit().

Couldn't this 'check_graph' parameter be a global variable similar to
the 'commit_graph' variable?  Maybe I am not understanding it.


See the two callers at the bottom of the patch. They have different 
purposes: one needs to fill in a valid commit struct, the other needs to 
check the commit buffer is valid (then throws away the struct). They 
have different values for 'check_graph'. Also, in parse_commit_gently() 
we check parse_commit_in_graph() before we call parse_commit_buffer, so 
we do not want to repeat work; in the case of a valid commit-graph file, 
but the commit is not in the commit-graph, we would repeat our binary 
search for the same commit.





Signed-off-by: Derrick Stolee 
---
  commit-graph.c | 51 --
  commit-graph.h |  8 
  commit.c   |  7 +--
  commit.h   |  2 +-
  object.c   |  2 +-
  sha1_file.c|  2 +-
  6 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 688d5b1801..21e853c21a 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -245,13 +245,19 @@ static struct commit_list **insert_parent_or_die(struct 
commit_graph *g,
return _list_insert(c, pptr)->next;
  }
  
+static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)

+{
+   const unsigned char *commit_data = g->chunk_commit_data + 
GRAPH_DATA_WIDTH * pos;
+   item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
+}
+
  static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, 
uint32_t pos)
  {
uint32_t edge_value;
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
struct commit_list **pptr;
-   const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len 
+ 16) * pos;
+   const unsigned char *commit_data = g->chunk_commit_data + 
GRAPH_DATA_WIDTH * pos;

I'm probably wrong, but isn't it unrelated change?


You're right. I saw this while I was in here, and there was a similar 
comment on this change in a different patch. Probably best to keep these 
cleanup things in a separate commit.



item->object.parsed = 1;
item->graph_pos = pos;
@@ -292,31 +298,40 @@ static int fill_commit_in_graph(struct commit *item, 
struct commit_graph *g, uin
return 1;
  }
  
+static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)

+{
+   if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
+   *pos = item->graph_pos;
+   return 1;
+   } else {
+   return bsearch_graph(commit_graph, &(item->object.oid), pos);
+   }
+}

All right (after the fix).


+
  int parse_commit_in_graph(struct commit *item)
  {
+   uint32_t pos;
+
+   if (item->object.parsed)
+   return 0;
if (!core_commit_graph)
return 0;
-   if (item->object.parsed)
-   return 1;

Hmmm... previously the function returned 1 if item->object.parsed, now
it returns 0 for this situation.  I don't understand this change.


The good news is that this change is unimportant (the only caller is 
parse_commit_gently() which checks item->object.parsed before calling 
parse_commit_in_graph()). I wonder why I reordered those things, anyway. 
I'll revert to simplify the patch.





-
prepare_commit_graph();
-   if (commit_graph) {
-   uint32_t pos;
-   int found;
-   if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
-   pos = item->graph_pos;
-   found = 1;
-   } else {
-   found = bsearch_graph(commit_graph, &(item->object.oid), 
);
-   }
-
-   if (found)
-   return fill_commit_in_graph(item, commit_graph, pos);
-   }
-
+   if (commit_graph && find_commit_in_graph(item, commit_graph, ))
+   

Re: [PATCH v3 8/9] commit-graph: always load commit-graph information

2018-04-18 Thread Jakub Narebski
Derrick Stolee  writes:

> Most code paths load commits using lookup_commit() and then
> parse_commit(). In some cases, including some branch lookups, the commit
> is parsed using parse_object_buffer() which side-steps parse_commit() in
> favor of parse_commit_buffer().
>
> With generation numbers in the commit-graph, we need to ensure that any
> commit that exists in the commit-graph file has its generation number
> loaded.

All right, that is nice explanation of the why behind this change.

>
> Create new load_commit_graph_info() method to fill in the information
> for a commit that exists only in the commit-graph file. Call it from
> parse_commit_buffer() after loading the other commit information from
> the given buffer. Only fill this information when specified by the
> 'check_graph' parameter. This avoids duplicate work when we already
> checked the graph in parse_commit_gently() or when simply checking the
> buffer contents in check_commit().

Couldn't this 'check_graph' parameter be a global variable similar to
the 'commit_graph' variable?  Maybe I am not understanding it.

>
> Signed-off-by: Derrick Stolee 
> ---
>  commit-graph.c | 51 --
>  commit-graph.h |  8 
>  commit.c   |  7 +--
>  commit.h   |  2 +-
>  object.c   |  2 +-
>  sha1_file.c|  2 +-
>  6 files changed, 49 insertions(+), 23 deletions(-)
>
> diff --git a/commit-graph.c b/commit-graph.c
> index 688d5b1801..21e853c21a 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -245,13 +245,19 @@ static struct commit_list **insert_parent_or_die(struct 
> commit_graph *g,
>   return _list_insert(c, pptr)->next;
>  }
>  
> +static void fill_commit_graph_info(struct commit *item, struct commit_graph 
> *g, uint32_t pos)
> +{
> + const unsigned char *commit_data = g->chunk_commit_data + 
> GRAPH_DATA_WIDTH * pos;
> + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
> +}
> +
>  static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, 
> uint32_t pos)
>  {
>   uint32_t edge_value;
>   uint32_t *parent_data_ptr;
>   uint64_t date_low, date_high;
>   struct commit_list **pptr;
> - const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len 
> + 16) * pos;
> + const unsigned char *commit_data = g->chunk_commit_data + 
> GRAPH_DATA_WIDTH * pos;

I'm probably wrong, but isn't it unrelated change?

>  
>   item->object.parsed = 1;
>   item->graph_pos = pos;
> @@ -292,31 +298,40 @@ static int fill_commit_in_graph(struct commit *item, 
> struct commit_graph *g, uin
>   return 1;
>  }
>  
> +static int find_commit_in_graph(struct commit *item, struct commit_graph *g, 
> uint32_t *pos)
> +{
> + if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
> + *pos = item->graph_pos;
> + return 1;
> + } else {
> + return bsearch_graph(commit_graph, &(item->object.oid), pos);
> + }
> +}

All right (after the fix).

> +
>  int parse_commit_in_graph(struct commit *item)
>  {
> + uint32_t pos;
> +
> + if (item->object.parsed)
> + return 0;
>   if (!core_commit_graph)
>   return 0;
> - if (item->object.parsed)
> - return 1;

Hmmm... previously the function returned 1 if item->object.parsed, now
it returns 0 for this situation.  I don't understand this change.

> -
>   prepare_commit_graph();
> - if (commit_graph) {
> - uint32_t pos;
> - int found;
> - if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
> - pos = item->graph_pos;
> - found = 1;
> - } else {
> - found = bsearch_graph(commit_graph, 
> &(item->object.oid), );
> - }
> -
> - if (found)
> - return fill_commit_in_graph(item, commit_graph, pos);
> - }
> -
> + if (commit_graph && find_commit_in_graph(item, commit_graph, ))
> + return fill_commit_in_graph(item, commit_graph, pos);

Nice refactoring.

>   return 0;
>  }
>  
> +void load_commit_graph_info(struct commit *item)
> +{
> + uint32_t pos;
> + if (!core_commit_graph)
> + return;
> + prepare_commit_graph();
> + if (commit_graph && find_commit_in_graph(item, commit_graph, ))
> + fill_commit_graph_info(item, commit_graph, pos);
> +}

And the reason for the refactoring.

> +
>  static struct tree *load_tree_for_commit(struct commit_graph *g, struct 
> commit *c)
>  {
>   struct object_id oid;
> diff --git a/commit-graph.h b/commit-graph.h
> index 260a468e73..96cccb10f3 100644
> --- a/commit-graph.h
> +++ b/commit-graph.h
> @@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir);
>   */
>  int parse_commit_in_graph(struct commit *item);
>  
> +/*
> + * It is possible that we loaded commit contents from the commit 

Re: [PATCH v3 8/9] commit-graph: always load commit-graph information

2018-04-17 Thread Derrick Stolee

On 4/17/2018 1:00 PM, Derrick Stolee wrote:

Most code paths load commits using lookup_commit() and then
parse_commit(). In some cases, including some branch lookups, the commit
is parsed using parse_object_buffer() which side-steps parse_commit() in
favor of parse_commit_buffer().

With generation numbers in the commit-graph, we need to ensure that any
commit that exists in the commit-graph file has its generation number
loaded.

Create new load_commit_graph_info() method to fill in the information
for a commit that exists only in the commit-graph file. Call it from
parse_commit_buffer() after loading the other commit information from
the given buffer. Only fill this information when specified by the
'check_graph' parameter. This avoids duplicate work when we already
checked the graph in parse_commit_gently() or when simply checking the
buffer contents in check_commit().

Signed-off-by: Derrick Stolee 
---
  commit-graph.c | 51 --
  commit-graph.h |  8 
  commit.c   |  7 +--
  commit.h   |  2 +-
  object.c   |  2 +-
  sha1_file.c|  2 +-
  6 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 688d5b1801..21e853c21a 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -245,13 +245,19 @@ static struct commit_list **insert_parent_or_die(struct 
commit_graph *g,
return _list_insert(c, pptr)->next;
  }
  
+static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)

+{
+   const unsigned char *commit_data = g->chunk_commit_data + 
GRAPH_DATA_WIDTH * pos;
+   item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
+}
+
  static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, 
uint32_t pos)
  {
uint32_t edge_value;
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
struct commit_list **pptr;
-   const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len 
+ 16) * pos;
+   const unsigned char *commit_data = g->chunk_commit_data + 
GRAPH_DATA_WIDTH * pos;
  
  	item->object.parsed = 1;

item->graph_pos = pos;
@@ -292,31 +298,40 @@ static int fill_commit_in_graph(struct commit *item, 
struct commit_graph *g, uin
return 1;
  }
  
+static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)

+{
+   if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
+   *pos = item->graph_pos;
+   return 1;
+   } else {
+   return bsearch_graph(commit_graph, &(item->object.oid), pos);


The reference to 'commit_graph' in the above line should be 'g'. Sorry!


+   }
+}
+
  int parse_commit_in_graph(struct commit *item)
  {
+   uint32_t pos;
+
+   if (item->object.parsed)
+   return 0;
if (!core_commit_graph)
return 0;
-   if (item->object.parsed)
-   return 1;
-
prepare_commit_graph();
-   if (commit_graph) {
-   uint32_t pos;
-   int found;
-   if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
-   pos = item->graph_pos;
-   found = 1;
-   } else {
-   found = bsearch_graph(commit_graph, &(item->object.oid), 
);
-   }
-
-   if (found)
-   return fill_commit_in_graph(item, commit_graph, pos);
-   }
-
+   if (commit_graph && find_commit_in_graph(item, commit_graph, ))
+   return fill_commit_in_graph(item, commit_graph, pos);
return 0;
  }
  
+void load_commit_graph_info(struct commit *item)

+{
+   uint32_t pos;
+   if (!core_commit_graph)
+   return;
+   prepare_commit_graph();
+   if (commit_graph && find_commit_in_graph(item, commit_graph, ))
+   fill_commit_graph_info(item, commit_graph, pos);
+}
+
  static struct tree *load_tree_for_commit(struct commit_graph *g, struct 
commit *c)
  {
struct object_id oid;
diff --git a/commit-graph.h b/commit-graph.h
index 260a468e73..96cccb10f3 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir);
   */
  int parse_commit_in_graph(struct commit *item);
  
+/*

+ * It is possible that we loaded commit contents from the commit buffer,
+ * but we also want to ensure the commit-graph content is correctly
+ * checked and filled. Fill the graph_pos and generation members of
+ * the given commit.
+ */
+void load_commit_graph_info(struct commit *item);
+
  struct tree *get_commit_tree_in_graph(const struct commit *c);
  
  struct commit_graph {

diff --git a/commit.c b/commit.c
index a70f120878..9ef6f699bd 100644
--- a/commit.c
+++ b/commit.c
@@ -331,7 +331,7 @@ const void *detach_commit_buffer(struct commit *commit, 
unsigned long *sizep)
return ret;
  }
  
-int 

[PATCH v3 8/9] commit-graph: always load commit-graph information

2018-04-17 Thread Derrick Stolee
Most code paths load commits using lookup_commit() and then
parse_commit(). In some cases, including some branch lookups, the commit
is parsed using parse_object_buffer() which side-steps parse_commit() in
favor of parse_commit_buffer().

With generation numbers in the commit-graph, we need to ensure that any
commit that exists in the commit-graph file has its generation number
loaded.

Create new load_commit_graph_info() method to fill in the information
for a commit that exists only in the commit-graph file. Call it from
parse_commit_buffer() after loading the other commit information from
the given buffer. Only fill this information when specified by the
'check_graph' parameter. This avoids duplicate work when we already
checked the graph in parse_commit_gently() or when simply checking the
buffer contents in check_commit().

Signed-off-by: Derrick Stolee 
---
 commit-graph.c | 51 --
 commit-graph.h |  8 
 commit.c   |  7 +--
 commit.h   |  2 +-
 object.c   |  2 +-
 sha1_file.c|  2 +-
 6 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index 688d5b1801..21e853c21a 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -245,13 +245,19 @@ static struct commit_list **insert_parent_or_die(struct 
commit_graph *g,
return _list_insert(c, pptr)->next;
 }
 
+static void fill_commit_graph_info(struct commit *item, struct commit_graph 
*g, uint32_t pos)
+{
+   const unsigned char *commit_data = g->chunk_commit_data + 
GRAPH_DATA_WIDTH * pos;
+   item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
+}
+
 static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, 
uint32_t pos)
 {
uint32_t edge_value;
uint32_t *parent_data_ptr;
uint64_t date_low, date_high;
struct commit_list **pptr;
-   const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len 
+ 16) * pos;
+   const unsigned char *commit_data = g->chunk_commit_data + 
GRAPH_DATA_WIDTH * pos;
 
item->object.parsed = 1;
item->graph_pos = pos;
@@ -292,31 +298,40 @@ static int fill_commit_in_graph(struct commit *item, 
struct commit_graph *g, uin
return 1;
 }
 
+static int find_commit_in_graph(struct commit *item, struct commit_graph *g, 
uint32_t *pos)
+{
+   if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
+   *pos = item->graph_pos;
+   return 1;
+   } else {
+   return bsearch_graph(commit_graph, &(item->object.oid), pos);
+   }
+}
+
 int parse_commit_in_graph(struct commit *item)
 {
+   uint32_t pos;
+
+   if (item->object.parsed)
+   return 0;
if (!core_commit_graph)
return 0;
-   if (item->object.parsed)
-   return 1;
-
prepare_commit_graph();
-   if (commit_graph) {
-   uint32_t pos;
-   int found;
-   if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
-   pos = item->graph_pos;
-   found = 1;
-   } else {
-   found = bsearch_graph(commit_graph, 
&(item->object.oid), );
-   }
-
-   if (found)
-   return fill_commit_in_graph(item, commit_graph, pos);
-   }
-
+   if (commit_graph && find_commit_in_graph(item, commit_graph, ))
+   return fill_commit_in_graph(item, commit_graph, pos);
return 0;
 }
 
+void load_commit_graph_info(struct commit *item)
+{
+   uint32_t pos;
+   if (!core_commit_graph)
+   return;
+   prepare_commit_graph();
+   if (commit_graph && find_commit_in_graph(item, commit_graph, ))
+   fill_commit_graph_info(item, commit_graph, pos);
+}
+
 static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit 
*c)
 {
struct object_id oid;
diff --git a/commit-graph.h b/commit-graph.h
index 260a468e73..96cccb10f3 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir);
  */
 int parse_commit_in_graph(struct commit *item);
 
+/*
+ * It is possible that we loaded commit contents from the commit buffer,
+ * but we also want to ensure the commit-graph content is correctly
+ * checked and filled. Fill the graph_pos and generation members of
+ * the given commit.
+ */
+void load_commit_graph_info(struct commit *item);
+
 struct tree *get_commit_tree_in_graph(const struct commit *c);
 
 struct commit_graph {
diff --git a/commit.c b/commit.c
index a70f120878..9ef6f699bd 100644
--- a/commit.c
+++ b/commit.c
@@ -331,7 +331,7 @@ const void *detach_commit_buffer(struct commit *commit, 
unsigned long *sizep)
return ret;
 }
 
-int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long 
size)
+int parse_commit_buffer(struct commit *item, const void *buffer,