Re: [PATCH 18/23] midx: use midx in abbreviation calculations

2018-06-22 Thread Derrick Stolee

On 6/9/2018 2:01 PM, Duy Nguyen wrote:

On Thu, Jun 7, 2018 at 4:06 PM Derrick Stolee  wrote:

@@ -565,8 +632,11 @@ static void find_abbrev_len_for_pack(struct packed_git *p,

  static void find_abbrev_len_packed(struct min_abbrev_data *mad)
  {
+   struct midxed_git *m;
 struct packed_git *p;

+   for (m = get_midxed_git(the_repository); m; m = m->next)
+   find_abbrev_len_for_midx(m, mad);

If all the packs are in midx, we don't need to run the second loop
below, do we? Otherwise I don't see why we waste cycles on finding
abbrev length on midx at all.


We put all packs _at time of writing_ into the midx. More packs may be 
added later that are not in the midx. There are tests in 
t5319-multi-pack-index.sh that verify everything works in this "mixed mode".


It is important that the packfiles are not loaded into the packed_git 
list if they are managed by the midx.





 for (p = get_packed_git(the_repository); p; p = p->next)
 find_abbrev_len_for_pack(p, mad);
  }




Re: [PATCH 18/23] midx: use midx in abbreviation calculations

2018-06-09 Thread Duy Nguyen
On Thu, Jun 7, 2018 at 4:06 PM Derrick Stolee  wrote:
> @@ -565,8 +632,11 @@ static void find_abbrev_len_for_pack(struct packed_git 
> *p,
>
>  static void find_abbrev_len_packed(struct min_abbrev_data *mad)
>  {
> +   struct midxed_git *m;
> struct packed_git *p;
>
> +   for (m = get_midxed_git(the_repository); m; m = m->next)
> +   find_abbrev_len_for_midx(m, mad);

If all the packs are in midx, we don't need to run the second loop
below, do we? Otherwise I don't see why we waste cycles on finding
abbrev length on midx at all.

> for (p = get_packed_git(the_repository); p; p = p->next)
> find_abbrev_len_for_pack(p, mad);
>  }
-- 
Duy


[PATCH 18/23] midx: use midx in abbreviation calculations

2018-06-07 Thread Derrick Stolee
Signed-off-by: Derrick Stolee 
---
 midx.c  | 11 
 midx.h  |  3 +++
 packfile.c  |  6 +
 packfile.h  |  1 +
 sha1-name.c | 70 +
 t/t5319-midx.sh |  3 ++-
 6 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/midx.c b/midx.c
index 6eca8f1b12..25d8142c2a 100644
--- a/midx.c
+++ b/midx.c
@@ -203,6 +203,17 @@ int bsearch_midx(const struct object_id *oid, struct 
midxed_git *m, uint32_t *re
MIDX_HASH_LEN, result);
 }
 
+struct object_id *nth_midxed_object_oid(struct object_id *oid,
+   struct midxed_git *m,
+   uint32_t n)
+{
+   if (n >= m->num_objects)
+   return NULL;
+
+   hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n);
+   return oid;
+}
+
 static off_t nth_midxed_offset(struct midxed_git *m, uint32_t pos)
 {
const unsigned char *offset_data;
diff --git a/midx.h b/midx.h
index 0c66812229..497bdcc77c 100644
--- a/midx.h
+++ b/midx.h
@@ -9,6 +9,9 @@
 
 struct midxed_git *load_midxed_git(const char *object_dir);
 int bsearch_midx(const struct object_id *oid, struct midxed_git *m, uint32_t 
*result);
+struct object_id *nth_midxed_object_oid(struct object_id *oid,
+   struct midxed_git *m,
+   uint32_t n);
 int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct 
midxed_git *m);
 int prepare_midxed_git_one(struct repository *r, const char *object_dir);
 
diff --git a/packfile.c b/packfile.c
index 73f8cc28ee..638e113972 100644
--- a/packfile.c
+++ b/packfile.c
@@ -919,6 +919,12 @@ struct packed_git *get_packed_git(struct repository *r)
return r->objects->packed_git;
 }
 
+struct midxed_git *get_midxed_git(struct repository *r)
+{
+   prepare_packed_git(r);
+   return r->objects->midxed_git;
+}
+
 struct list_head *get_packed_git_mru(struct repository *r)
 {
prepare_packed_git(r);
diff --git a/packfile.h b/packfile.h
index e0a38aba93..01e14b93fd 100644
--- a/packfile.h
+++ b/packfile.h
@@ -39,6 +39,7 @@ extern void install_packed_git(struct repository *r, struct 
packed_git *pack);
 
 struct packed_git *get_packed_git(struct repository *r);
 struct list_head *get_packed_git_mru(struct repository *r);
+struct midxed_git *get_midxed_git(struct repository *r);
 
 /*
  * Give a rough count of objects in the repository. This sacrifices accuracy
diff --git a/sha1-name.c b/sha1-name.c
index 60d9ef3c7e..d975a186c9 100644
--- a/sha1-name.c
+++ b/sha1-name.c
@@ -12,6 +12,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "repository.h"
+#include "midx.h"
 
 static int get_oid_oneline(const char *, struct object_id *, struct 
commit_list *);
 
@@ -149,6 +150,32 @@ static int match_sha(unsigned len, const unsigned char *a, 
const unsigned char *
return 1;
 }
 
+static void unique_in_midx(struct midxed_git *m,
+  struct disambiguate_state *ds)
+{
+   uint32_t num, i, first = 0;
+   const struct object_id *current = NULL;
+   num = m->num_objects;
+
+   if (!num)
+   return;
+
+   bsearch_midx(>bin_pfx, m, );
+
+   /*
+* At this point, "first" is the location of the lowest object
+* with an object name that could match "bin_pfx".  See if we have
+* 0, 1 or more objects that actually match(es).
+*/
+   for (i = first; i < num && !ds->ambiguous; i++) {
+   struct object_id oid;
+   current = nth_midxed_object_oid(, m, i);
+   if (!match_sha(ds->len, ds->bin_pfx.hash, current->hash))
+   break;
+   update_candidates(ds, current);
+   }
+}
+
 static void unique_in_pack(struct packed_git *p,
   struct disambiguate_state *ds)
 {
@@ -177,8 +204,12 @@ static void unique_in_pack(struct packed_git *p,
 
 static void find_short_packed_object(struct disambiguate_state *ds)
 {
+   struct midxed_git *m;
struct packed_git *p;
 
+   for (m = get_midxed_git(the_repository); m && !ds->ambiguous;
+m = m->next)
+   unique_in_midx(m, ds);
for (p = get_packed_git(the_repository); p && !ds->ambiguous;
 p = p->next)
unique_in_pack(p, ds);
@@ -527,6 +558,42 @@ static int extend_abbrev_len(const struct object_id *oid, 
void *cb_data)
return 0;
 }
 
+static void find_abbrev_len_for_midx(struct midxed_git *m,
+struct min_abbrev_data *mad)
+{
+   int match = 0;
+   uint32_t num, first = 0;
+   struct object_id oid;
+   const struct object_id *mad_oid;
+
+   if (!m->num_objects)
+   return;
+
+   num = m->num_objects;
+   mad_oid = mad->oid;
+   match = bsearch_midx(mad_oid, m, );
+
+   /*
+* first is now the position in