Re: [PATCH v4 06/10] ref-filter: use generation number for --contains

2018-04-30 Thread Jakub Narebski
Derrick Stolee  writes:

> A commit A can reach a commit B only if the generation number of A
> is strictly larger than the generation number of B. This condition
> allows significantly short-circuiting commit-graph walks.
>
> Use generation number for '--contains' type queries.
>
> On a copy of the Linux repository where HEAD is containd in v4.13

Minor typo: containd -> contained.

> but no earlier tag, the command 'git tag --contains HEAD' had the
> following peformance improvement:
>
> Before: 0.81s
> After:  0.04s
> Rel %:  -95%

Very nice.  I guess that any performance changes for when commit-graph
feature is not available are negligible / not measurable.

Rel % = (before - after)/before * 100%, isn't it?.

Good.

>
> Helped-by: Jeff King 
> Signed-off-by: Derrick Stolee 
> ---
>  ref-filter.c | 24 
>  1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/ref-filter.c b/ref-filter.c
> index aff24d93be..fb35067fc9 100644
> --- a/ref-filter.c
> +++ b/ref-filter.c
> @@ -16,6 +16,7 @@
>  #include "trailer.h"
>  #include "wt-status.h"
>  #include "commit-slab.h"
> +#include "commit-graph.h"
>  
>  static struct ref_msg {
>   const char *gone;
> @@ -1587,7 +1588,8 @@ static int in_commit_list(const struct commit_list 
> *want, struct commit *c)
>   */
>  static enum contains_result contains_test(struct commit *candidate,
> const struct commit_list *want,
> -   struct contains_cache *cache)
> +   struct contains_cache *cache,
> +   uint32_t cutoff)
>  {
>   enum contains_result *cached = contains_cache_at(cache, candidate);
>  
> @@ -1603,6 +1605,10 @@ static enum contains_result contains_test(struct 
> commit *candidate,
>  
>   /* Otherwise, we don't know; prepare to recurse */
>   parse_commit_or_die(candidate);
> +
> + if (candidate->generation < cutoff)
> + return CONTAINS_NO;
> +

We use here weaker negative-cut criteria, which has the advantage of
simply automatic handling of special values: _INFINITY, _MAX, _ZERO.

Stronger version:

  if A != B and A ---> B, then gen(A) > gen(B)

  if gen(A) <= gen(B) and A != B, then A -/-> B

Weaker version:

  if gen(A) < gen(B), then A -/-> B

If commit-graph feature is not available, then all generation numbers
would be _INFINITY, and cutoff would also be _INFINITY - which means
this operation is practically no-op.  One memory access (probably from
cache) and one comparison is very cheap.

All right.

>   return CONTAINS_UNKNOWN;
>  }
>  
> @@ -1618,8 +1624,18 @@ static enum contains_result contains_tag_algo(struct 
> commit *candidate,
> struct contains_cache *cache)
>  {
>   struct contains_stack contains_stack = { 0, 0, NULL };
> - enum contains_result result = contains_test(candidate, want, cache);
> + enum contains_result result;
> + uint32_t cutoff = GENERATION_NUMBER_INFINITY;
> + const struct commit_list *p;
> +
> + for (p = want; p; p = p->next) {
> + struct commit *c = p->item;
> + load_commit_graph_info(c);
> + if (c->generation < cutoff)
> + cutoff = c->generation;
> + }

For each in wants, load generation numbers if needed and find lowest
one.  Anything lower cannot reach any of wants.  All right.

If commit-graph feature is not available, this is practically no-op.  It
is fast, as it only accesses memory - it does not access disk, nor do it
needs to do any decompression, un-deltafication or parsing.

All right.

>  
> + result = contains_test(candidate, want, cache, cutoff);
>   if (result != CONTAINS_UNKNOWN)
>   return result;
>  
> @@ -1637,7 +1653,7 @@ static enum contains_result contains_tag_algo(struct 
> commit *candidate,
>* If we just popped the stack, parents->item has been marked,
>* therefore contains_test will return a meaningful yes/no.
>*/
> - else switch (contains_test(parents->item, want, cache)) {
> + else switch (contains_test(parents->item, want, cache, cutoff)) 
> {
>   case CONTAINS_YES:
>   *contains_cache_at(cache, commit) = CONTAINS_YES;
>   contains_stack.nr--;
> @@ -1651,7 +1667,7 @@ static enum contains_result contains_tag_algo(struct 
> commit *candidate,
>   }
>   }
>   free(contains_stack.contains_stack);
> - return contains_test(candidate, want, cache);
> + return contains_test(candidate, want, cache, cutoff);

Those two just update callsite to new signatore.  All right.

>  }
>  
>  static int commit_contains(struct ref_filter *filter, struct commit *commit,


[PATCH v4 06/10] ref-filter: use generation number for --contains

2018-04-25 Thread Derrick Stolee
A commit A can reach a commit B only if the generation number of A
is strictly larger than the generation number of B. This condition
allows significantly short-circuiting commit-graph walks.

Use generation number for '--contains' type queries.

On a copy of the Linux repository where HEAD is containd in v4.13
but no earlier tag, the command 'git tag --contains HEAD' had the
following peformance improvement:

Before: 0.81s
After:  0.04s
Rel %:  -95%

Helped-by: Jeff King 
Signed-off-by: Derrick Stolee 
---
 ref-filter.c | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index aff24d93be..fb35067fc9 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -16,6 +16,7 @@
 #include "trailer.h"
 #include "wt-status.h"
 #include "commit-slab.h"
+#include "commit-graph.h"
 
 static struct ref_msg {
const char *gone;
@@ -1587,7 +1588,8 @@ static int in_commit_list(const struct commit_list *want, 
struct commit *c)
  */
 static enum contains_result contains_test(struct commit *candidate,
  const struct commit_list *want,
- struct contains_cache *cache)
+ struct contains_cache *cache,
+ uint32_t cutoff)
 {
enum contains_result *cached = contains_cache_at(cache, candidate);
 
@@ -1603,6 +1605,10 @@ static enum contains_result contains_test(struct commit 
*candidate,
 
/* Otherwise, we don't know; prepare to recurse */
parse_commit_or_die(candidate);
+
+   if (candidate->generation < cutoff)
+   return CONTAINS_NO;
+
return CONTAINS_UNKNOWN;
 }
 
@@ -1618,8 +1624,18 @@ static enum contains_result contains_tag_algo(struct 
commit *candidate,
  struct contains_cache *cache)
 {
struct contains_stack contains_stack = { 0, 0, NULL };
-   enum contains_result result = contains_test(candidate, want, cache);
+   enum contains_result result;
+   uint32_t cutoff = GENERATION_NUMBER_INFINITY;
+   const struct commit_list *p;
+
+   for (p = want; p; p = p->next) {
+   struct commit *c = p->item;
+   load_commit_graph_info(c);
+   if (c->generation < cutoff)
+   cutoff = c->generation;
+   }
 
+   result = contains_test(candidate, want, cache, cutoff);
if (result != CONTAINS_UNKNOWN)
return result;
 
@@ -1637,7 +1653,7 @@ static enum contains_result contains_tag_algo(struct 
commit *candidate,
 * If we just popped the stack, parents->item has been marked,
 * therefore contains_test will return a meaningful yes/no.
 */
-   else switch (contains_test(parents->item, want, cache)) {
+   else switch (contains_test(parents->item, want, cache, cutoff)) 
{
case CONTAINS_YES:
*contains_cache_at(cache, commit) = CONTAINS_YES;
contains_stack.nr--;
@@ -1651,7 +1667,7 @@ static enum contains_result contains_tag_algo(struct 
commit *candidate,
}
}
free(contains_stack.contains_stack);
-   return contains_test(candidate, want, cache);
+   return contains_test(candidate, want, cache, cutoff);
 }
 
 static int commit_contains(struct ref_filter *filter, struct commit *commit,
-- 
2.17.0.39.g685157f7fb