RE: [PATCH 3/6] tmp-objdir: introduce API for temporary object directories

2016-09-30 Thread David Turner
Thanks.  The rest all look good too.

> -Original Message-
> From: Jeff King [mailto:p...@peff.net]
> Sent: Friday, September 30, 2016 6:45 PM
> To: David Turner
> Cc: git@vger.kernel.org
> Subject: Re: [PATCH 3/6] tmp-objdir: introduce API for temporary object
> directories
> 
> On Fri, Sep 30, 2016 at 09:32:04PM +, David Turner wrote:
> 
> > > +static void env_append(struct argv_array *env, const char *key,
> > > +const char *val) {
> > > + const char *old = getenv(key);
> > > +
> > > + if (!old)
> > > + argv_array_pushf(env, "%s=%s", key, val);
> > > + else
> > > + argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP,
> > > val);
> > >+}
> >
> > I would like a comment explaining this function.
> 
> I'll squash in:
> 
> diff --git a/tmp-objdir.c b/tmp-objdir.c index c92e6cc..a98c246 100644
> --- a/tmp-objdir.c
> +++ b/tmp-objdir.c
> @@ -70,6 +70,13 @@ static void remove_tmp_objdir_on_signal(int signo)
>   raise(signo);
>  }
> 
> +/*
> + * These env_* functions are for setting up the child environment; the
> + * "replace" variant overrides the value of any existing variable with
> +that
> + * "key". The "append" variant puts our new value at the end of a list,
> + * separated by PATH_SEP (which is what separate values in
> + * GIT_ALTERNATE_OBJECT_DIRECTORIES).
> + */
>  static void env_append(struct argv_array *env, const char *key, const char
> *val)  {
>   const char *old = getenv(key);
> 
> > > + * Finalize a temporary object directory by migrating its objects
> > > +into the main
> > > + * object database.
> > > + */
> >
> > This should mention that it frees its argument.
> 
> And:
> 
> diff --git a/tmp-objdir.h b/tmp-objdir.h index aa47aa9..b1e45b4 100644
> --- a/tmp-objdir.h
> +++ b/tmp-objdir.h
> @@ -35,7 +35,8 @@ const char **tmp_objdir_env(const struct tmp_objdir
> *);
> 
>  /*
>   * Finalize a temporary object directory by migrating its objects into the 
> main
> - * object database.
> + * object database, removing the temporary directory, and freeing any
> + * associated resources.
>   */
>  int tmp_objdir_migrate(struct tmp_objdir *);
> 
> 
> -Peff


Re: [PATCH 3/6] tmp-objdir: introduce API for temporary object directories

2016-09-30 Thread Jeff King
On Fri, Sep 30, 2016 at 09:32:04PM +, David Turner wrote:

> > +static void env_append(struct argv_array *env, const char *key, const
> > +char *val) {
> > +   const char *old = getenv(key);
> > +
> > +   if (!old)
> > +   argv_array_pushf(env, "%s=%s", key, val);
> > +   else
> > +   argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP,
> > val); 
> >+}
> 
> I would like a comment explaining this function. 

I'll squash in:

diff --git a/tmp-objdir.c b/tmp-objdir.c
index c92e6cc..a98c246 100644
--- a/tmp-objdir.c
+++ b/tmp-objdir.c
@@ -70,6 +70,13 @@ static void remove_tmp_objdir_on_signal(int signo)
raise(signo);
 }
 
+/*
+ * These env_* functions are for setting up the child environment; the
+ * "replace" variant overrides the value of any existing variable with that
+ * "key". The "append" variant puts our new value at the end of a list,
+ * separated by PATH_SEP (which is what separate values in
+ * GIT_ALTERNATE_OBJECT_DIRECTORIES).
+ */
 static void env_append(struct argv_array *env, const char *key, const char 
*val)
 {
const char *old = getenv(key);

> > + * Finalize a temporary object directory by migrating its objects into
> > +the main
> > + * object database.
> > + */
> 
> This should mention that it frees its argument.

And:

diff --git a/tmp-objdir.h b/tmp-objdir.h
index aa47aa9..b1e45b4 100644
--- a/tmp-objdir.h
+++ b/tmp-objdir.h
@@ -35,7 +35,8 @@ const char **tmp_objdir_env(const struct tmp_objdir *);
 
 /*
  * Finalize a temporary object directory by migrating its objects into the main
- * object database.
+ * object database, removing the temporary directory, and freeing any
+ * associated resources.
  */
 int tmp_objdir_migrate(struct tmp_objdir *);
 

-Peff


Re: [PATCH 3/6] tmp-objdir: introduce API for temporary object directories

2016-09-30 Thread Jeff King
On Fri, Sep 30, 2016 at 02:25:43PM -0700, Junio C Hamano wrote:

> > +void add_to_alternates_internal(const char *reference)
> > +{
> > +   prepare_alt_odb();
> > +   link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
> > +}
> > +
> 
> A function _internal being extern felt a bit funny.  We are only
> appending so the first one does not have to be reprepare.

It's a match for add_to_alternates_file(). Suggestions for a better word
are welcome.

We do need to prepare_alt_odb(), as that is what sets up the
alt_odb_tail pointer. And also, a later prepare() call would overwrite
our entry.  We could refactor the alt_odb code, but it seemed simplest
to just make sure we don't add to an unprepared list.

> > +   t = xmalloc(sizeof(*t));
> > +   strbuf_init(>path, 0);
> > +   argv_array_init(>env);
> > +
> > +   strbuf_addf(>path, "%s/incoming-XX", get_object_directory());
> 
> I was wondering where you would put this in.  Inside .git/objects/
> sounds good.

The name "incoming" is kind of arbitrary and related to the fact that
this is used for receive-pack (though if we were to use it on the
fetching side, I think it would be equally correct). I don't think it
really matters in practice.

> > +static int pack_copy_priority(const char *name)
> > +{
> > +   if (!starts_with(name, "pack"))
> > +   return 0;
> > +   if (ends_with(name, ".keep"))
> > +   return 1;
> > +   if (ends_with(name, ".pack"))
> > +   return 2;
> > +   if (ends_with(name, ".idx"))
> > +   return 3;
> > +   return 4;
> > +}
> 
> Thanks for being careful.  A blind "cp -r" would have ruined the
> day.
> 
> We do not do bitmaps upon receiving, I guess.

But we don't, but they (and anything else) would just sort at the end,
which is OK.

> > + * struct tmp_objdir *t = tmp_objdir_create();
> > + * if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) &&
> > + * !tmp_objdir_migrate(t))
> > + * printf("success!\n");
> > + * else
> > + * die("failed...tmp_objdir will clean up for us");
> 
> Made me briefly wonder if a caller might want to use appropriate
> environment to use the tmp-objdir given by the API in addition to
> its own, but then such a caller just needs to prepare its own argv-array
> and concatenate tmp_objdir_env() before making the opt_cd_env call,
> so this is perfectly fine.

Yep, and that's exactly what happens in one spot of the next patch.
My original had just open-coded, but I was happy to see we have
argv_array_pushv() these days, so it's a one-liner.

In the very original version, the receive-pack process did not need to
access the new objects at all (not until ref update time anyway, at
which point they've been migrated). And that's why the environment is
intentionally kept separate, and the caller can feed it to whichever
sub-programs it chooses. But a later version of git that handled shallow
pushes required receive-pack to actually look at the objects, and I
added the add_to_alternates_internal() call you see here.

At that point, it does make me wonder if a better interface would be for
tmp_objdir to just set up the environment variables in the parent
process in the first place, and then restore them upon
tmp_objdir_destroy(). It makes things a bit more automatic, which makes
me hesitate, but I think it would be fine for receive-pack.

I dunno. I mostly left it alone because I did it this way long ago, and
it wasn't broke. Polishing for upstream is an opportunity to fix old
oddities, but I think there is some value in applying a more
battle-tested patch.

-Peff



RE: [PATCH 3/6] tmp-objdir: introduce API for temporary object directories

2016-09-30 Thread David Turner
> +static void env_append(struct argv_array *env, const char *key, const
> +char *val) {
> + const char *old = getenv(key);
> +
> + if (!old)
> + argv_array_pushf(env, "%s=%s", key, val);
> + else
> + argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP,
> val); 
>+}

I would like a comment explaining this function. 

> + * Finalize a temporary object directory by migrating its objects into
> +the main
> + * object database.
> + */

This should mention that it frees its argument.



Re: [PATCH 3/6] tmp-objdir: introduce API for temporary object directories

2016-09-30 Thread Junio C Hamano
Jeff King  writes:

> diff --git a/sha1_file.c b/sha1_file.c
> index 9a79c19..65deaf9 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -421,6 +421,12 @@ void add_to_alternates_file(const char *reference)
>   free(alts);
>  }
>  
> +void add_to_alternates_internal(const char *reference)
> +{
> + prepare_alt_odb();
> + link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
> +}
> +

A function _internal being extern felt a bit funny.  We are only
appending so the first one does not have to be reprepare.

> +static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
> +{
> + int err;
> +
> + if (!t)
> + return 0;
> +
> + if (t == the_tmp_objdir)
> + the_tmp_objdir = NULL;
> +
> + /*
> +  * This may use malloc via strbuf_grow(), but we should
> +  * have pre-grown t->path sufficiently so that this
> +  * doesn't happen in practice.
> +  */
> + err = remove_dir_recursively(>path, 0);
> +
> + /*
> +  * When we are cleaning up due to a signal, we won't bother
> +  * freeing memory; it may cause a deadlock if the signal
> +  * arrived while libc's allocator lock is held.
> +  */
> + if (!on_signal)
> + tmp_objdir_free(t);
> + return err;
> +}
> +
> +int tmp_objdir_destroy(struct tmp_objdir *t)
> +{
> + return tmp_objdir_destroy_1(t, 0);
> +}

Looks sensible.

> + t = xmalloc(sizeof(*t));
> + strbuf_init(>path, 0);
> + argv_array_init(>env);
> +
> + strbuf_addf(>path, "%s/incoming-XX", get_object_directory());

I was wondering where you would put this in.  Inside .git/objects/
sounds good.

> +/*
> + * Make sure we copy packfiles and their associated metafiles in the correct
> + * order. All of these ends_with checks are slightly expensive to do in
> + * the midst of a sorting routine, but in practice it shouldn't matter.
> + * We will have a relatively small number of packfiles to order, and loose
> + * objects exit early in the first line.
> + */
> +static int pack_copy_priority(const char *name)
> +{
> + if (!starts_with(name, "pack"))
> + return 0;
> + if (ends_with(name, ".keep"))
> + return 1;
> + if (ends_with(name, ".pack"))
> + return 2;
> + if (ends_with(name, ".idx"))
> + return 3;
> + return 4;
> +}

Thanks for being careful.  A blind "cp -r" would have ruined the
day.

We do not do bitmaps upon receiving, I guess.

> + *   struct tmp_objdir *t = tmp_objdir_create();
> + *   if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) &&
> + *   !tmp_objdir_migrate(t))
> + *   printf("success!\n");
> + *   else
> + *   die("failed...tmp_objdir will clean up for us");

Made me briefly wonder if a caller might want to use appropriate
environment to use the tmp-objdir given by the API in addition to
its own, but then such a caller just needs to prepare its own argv-array
and concatenate tmp_objdir_env() before making the opt_cd_env call,
so this is perfectly fine.



[PATCH 3/6] tmp-objdir: introduce API for temporary object directories

2016-09-30 Thread Jeff King
Once objects are added to the object database by a process,
they cannot easily be deleted, as we don't know what other
processes may have started referencing them. We have to
clean them up with git-gc, which will apply the usual
reachability and grace-period checks.

This patch provides an alternative: it helps callers create
a temporary directory inside the object directory, and a
temporary environment which can be passed to sub-programs to
ask them to write there (the original object directory
remains accessible as an alternate of the temporary one).

See tmp-objdir.h for details on the API.

Signed-off-by: Jeff King 
---
 Makefile |   1 +
 cache.h  |   1 +
 sha1_file.c  |   6 ++
 tmp-objdir.c | 266 +++
 tmp-objdir.h |  53 
 5 files changed, 327 insertions(+)
 create mode 100644 tmp-objdir.c
 create mode 100644 tmp-objdir.h

diff --git a/Makefile b/Makefile
index 1aad150..4e3becb 100644
--- a/Makefile
+++ b/Makefile
@@ -831,6 +831,7 @@ LIB_OBJS += submodule-config.o
 LIB_OBJS += symlinks.o
 LIB_OBJS += tag.o
 LIB_OBJS += tempfile.o
+LIB_OBJS += tmp-objdir.o
 LIB_OBJS += trace.o
 LIB_OBJS += trailer.o
 LIB_OBJS += transport.o
diff --git a/cache.h b/cache.h
index ed3d5df..607c9b5 100644
--- a/cache.h
+++ b/cache.h
@@ -1389,6 +1389,7 @@ extern void prepare_alt_odb(void);
 extern void read_info_alternates(const char * relative_base, int depth);
 extern char *compute_alternate_path(const char *path, struct strbuf *err);
 extern void add_to_alternates_file(const char *reference);
+extern void add_to_alternates_internal(const char *reference);
 typedef int alt_odb_fn(struct alternate_object_database *, void *);
 extern int foreach_alt_odb(alt_odb_fn, void*);
 
diff --git a/sha1_file.c b/sha1_file.c
index 9a79c19..65deaf9 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -421,6 +421,12 @@ void add_to_alternates_file(const char *reference)
free(alts);
 }
 
+void add_to_alternates_internal(const char *reference)
+{
+   prepare_alt_odb();
+   link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
+}
+
 /*
  * Compute the exact path an alternate is at and returns it. In case of
  * error NULL is returned and the human readable error is added to `err`
diff --git a/tmp-objdir.c b/tmp-objdir.c
new file mode 100644
index 000..c92e6cc
--- /dev/null
+++ b/tmp-objdir.c
@@ -0,0 +1,266 @@
+#include "cache.h"
+#include "tmp-objdir.h"
+#include "dir.h"
+#include "sigchain.h"
+#include "string-list.h"
+#include "strbuf.h"
+#include "argv-array.h"
+
+struct tmp_objdir {
+   struct strbuf path;
+   struct argv_array env;
+};
+
+/*
+ * Allow only one tmp_objdir at a time in a running process, which simplifies
+ * our signal/atexit cleanup routines.  It's doubtful callers will ever need
+ * more than one, and we can expand later if so.  You can have many such
+ * tmp_objdirs simultaneously in many processes, of course.
+ */
+struct tmp_objdir *the_tmp_objdir;
+
+static void tmp_objdir_free(struct tmp_objdir *t)
+{
+   strbuf_release(>path);
+   argv_array_clear(>env);
+   free(t);
+}
+
+static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
+{
+   int err;
+
+   if (!t)
+   return 0;
+
+   if (t == the_tmp_objdir)
+   the_tmp_objdir = NULL;
+
+   /*
+* This may use malloc via strbuf_grow(), but we should
+* have pre-grown t->path sufficiently so that this
+* doesn't happen in practice.
+*/
+   err = remove_dir_recursively(>path, 0);
+
+   /*
+* When we are cleaning up due to a signal, we won't bother
+* freeing memory; it may cause a deadlock if the signal
+* arrived while libc's allocator lock is held.
+*/
+   if (!on_signal)
+   tmp_objdir_free(t);
+   return err;
+}
+
+int tmp_objdir_destroy(struct tmp_objdir *t)
+{
+   return tmp_objdir_destroy_1(t, 0);
+}
+
+static void remove_tmp_objdir(void)
+{
+   tmp_objdir_destroy(the_tmp_objdir);
+}
+
+static void remove_tmp_objdir_on_signal(int signo)
+{
+   tmp_objdir_destroy_1(the_tmp_objdir, 1);
+   sigchain_pop(signo);
+   raise(signo);
+}
+
+static void env_append(struct argv_array *env, const char *key, const char 
*val)
+{
+   const char *old = getenv(key);
+
+   if (!old)
+   argv_array_pushf(env, "%s=%s", key, val);
+   else
+   argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP, val);
+}
+
+static void env_replace(struct argv_array *env, const char *key, const char 
*val)
+{
+   argv_array_pushf(env, "%s=%s", key, val);
+}
+
+static int setup_tmp_objdir(const char *root)
+{
+   char *path;
+   int ret = 0;
+
+   path = xstrfmt("%s/pack", root);
+   ret = mkdir(path, 0777);
+   free(path);
+
+   return ret;
+}
+
+struct tmp_objdir *tmp_objdir_create(void)
+{
+   static int installed_handlers;
+   struct