Re: [PATCH v2 05/38] refs: create a base class "ref_store" for files_ref_store

2016-09-05 Thread David Turner
On Mon, 2016-09-05 at 05:53 +0200, Michael Haggerty wrote:
> On 09/04/2016 10:40 PM, David Turner wrote:
> > On Sun, 2016-09-04 at 18:08 +0200, Michael Haggerty wrote:
> > 
> >> +/* A linked list of ref_stores for submodules: */
> >> +static struct ref_store *submodule_ref_stores;
> > 
> > I don't like the per-submodule stores being in a linked list, which
> > requires a linear search.  Stefan has, I think, been doing a bunch of
> > work to scale git to support on the order of thousands of submodules,
> > which this potentially breaks.  What about a hashmap?
> 
> I agree it's not ideal, but this linked list is not new. Before this
> patch the same code was in `files-backend.c`, and before patch [03/38]
> essentially the same linked list was stored as `submodule_ref_caches`.
> So this is not a regression, and I'd rather not address it in this patch
> series.

Fair enough!



Re: [PATCH v2 05/38] refs: create a base class "ref_store" for files_ref_store

2016-09-04 Thread Michael Haggerty
On 09/04/2016 10:40 PM, David Turner wrote:
> On Sun, 2016-09-04 at 18:08 +0200, Michael Haggerty wrote:
> 
>> +/* A linked list of ref_stores for submodules: */
>> +static struct ref_store *submodule_ref_stores;
> 
> I don't like the per-submodule stores being in a linked list, which
> requires a linear search.  Stefan has, I think, been doing a bunch of
> work to scale git to support on the order of thousands of submodules,
> which this potentially breaks.  What about a hashmap?

I agree it's not ideal, but this linked list is not new. Before this
patch the same code was in `files-backend.c`, and before patch [03/38]
essentially the same linked list was stored as `submodule_ref_caches`.
So this is not a regression, and I'd rather not address it in this patch
series.

CC Stefan in case he'd like to put it on his radar. Honestly, I've never
checked how often the submodule reference cache is used in real life.

Michael



Re: [PATCH v2 05/38] refs: create a base class "ref_store" for files_ref_store

2016-09-04 Thread David Turner
On Sun, 2016-09-04 at 18:08 +0200, Michael Haggerty wrote:

> +/* A linked list of ref_stores for submodules: */
> +static struct ref_store *submodule_ref_stores;

I don't like the per-submodule stores being in a linked list, which
requires a linear search.  Stefan has, I think, been doing a bunch of
work to scale git to support on the order of thousands of submodules,
which this potentially breaks.  What about a hashmap?




[PATCH v2 05/38] refs: create a base class "ref_store" for files_ref_store

2016-09-04 Thread Michael Haggerty
We want ref_stores to be polymorphic, so invent a base class of which
files_ref_store is a derived class. For now there is exactly one
ref_store for the main repository and one for any submodules whose
references have been accessed.

Signed-off-by: Michael Haggerty 
Signed-off-by: Junio C Hamano 
---
 refs.c   |  93 +++
 refs/files-backend.c | 177 ---
 refs/refs-internal.h |  78 +++
 3 files changed, 270 insertions(+), 78 deletions(-)

diff --git a/refs.c b/refs.c
index d2a29bb..9d6bcb1 100644
--- a/refs.c
+++ b/refs.c
@@ -1151,8 +1151,12 @@ int head_ref(each_ref_fn fn, void *cb_data)
 static int do_for_each_ref(const char *submodule, const char *prefix,
   each_ref_fn fn, int trim, int flags, void *cb_data)
 {
+   struct ref_store *refs = get_ref_store(submodule);
struct ref_iterator *iter;
 
+   if (!refs)
+   return 0;
+
iter = files_ref_iterator_begin(submodule, prefix, flags);
iter = prefix_ref_iterator_begin(iter, prefix, trim);
 
@@ -1284,3 +1288,92 @@ const char *resolve_ref_unsafe(const char *refname, int 
resolve_flags,
errno = ELOOP;
return NULL;
 }
+
+/* A pointer to the ref_store for the main repository: */
+static struct ref_store *main_ref_store;
+
+/* A linked list of ref_stores for submodules: */
+static struct ref_store *submodule_ref_stores;
+
+void base_ref_store_init(struct ref_store *refs,
+const struct ref_storage_be *be,
+const char *submodule)
+{
+   refs->be = be;
+   if (!submodule) {
+   if (main_ref_store)
+   die("BUG: main_ref_store initialized twice");
+
+   refs->submodule = "";
+   refs->next = NULL;
+   main_ref_store = refs;
+   } else {
+   if (lookup_ref_store(submodule))
+   die("BUG: ref_store for submodule '%s' initialized 
twice",
+   submodule);
+
+   refs->submodule = xstrdup(submodule);
+   refs->next = submodule_ref_stores;
+   submodule_ref_stores = refs;
+   }
+}
+
+struct ref_store *ref_store_init(const char *submodule)
+{
+   const char *be_name = "files";
+   struct ref_storage_be *be = find_ref_storage_backend(be_name);
+
+   if (!be)
+   die("BUG: reference backend %s is unknown", be_name);
+
+   if (!submodule || !*submodule)
+   return be->init(NULL);
+   else
+   return be->init(submodule);
+}
+
+struct ref_store *lookup_ref_store(const char *submodule)
+{
+   struct ref_store *refs;
+
+   if (!submodule || !*submodule)
+   return main_ref_store;
+
+   for (refs = submodule_ref_stores; refs; refs = refs->next) {
+   if (!strcmp(submodule, refs->submodule))
+   return refs;
+   }
+
+   return NULL;
+}
+
+struct ref_store *get_ref_store(const char *submodule)
+{
+   struct ref_store *refs;
+
+   if (!submodule || !*submodule) {
+   refs = lookup_ref_store(NULL);
+
+   if (!refs)
+   refs = ref_store_init(NULL);
+   } else {
+   refs = lookup_ref_store(submodule);
+
+   if (!refs) {
+   struct strbuf submodule_sb = STRBUF_INIT;
+
+   strbuf_addstr(_sb, submodule);
+   if (is_nonbare_repository_dir(_sb))
+   refs = ref_store_init(submodule);
+   strbuf_release(_sb);
+   }
+   }
+
+   return refs;
+}
+
+void assert_main_repository(struct ref_store *refs, const char *caller)
+{
+   if (*refs->submodule)
+   die("BUG: %s called for a submodule", caller);
+}
diff --git a/refs/files-backend.c b/refs/files-backend.c
index ecf66e6..439c500 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -910,17 +910,11 @@ struct packed_ref_cache {
  * Future: need to be in "struct repository"
  * when doing a full libification.
  */
-static struct files_ref_store {
-   struct files_ref_store *next;
+struct files_ref_store {
+   struct ref_store base;
struct ref_entry *loose;
struct packed_ref_cache *packed;
-   /*
-* The submodule name, or "" for the main repo. We allocate
-* length 1 rather than FLEX_ARRAY so that the main
-* files_ref_store is initialized correctly.
-*/
-   char name[1];
-} ref_store, *submodule_ref_stores;
+};
 
 /* Lock used for the main packed-refs file: */
 static struct lock_file packlock;
@@ -973,53 +967,50 @@ static void clear_loose_ref_cache(struct files_ref_store 
*refs)
  * Create a new submodule ref cache and add it to the internal
  * set of caches.
  */
-static struct files_ref_store