Re: [PATCH/RFC/GSoC 06/17] rebase-am: introduce am backend for builtin rebase

2016-03-19 Thread Johannes Schindelin
Hi Paul,

On Sat, 12 Mar 2016, Paul Tan wrote:

> diff --git a/builtin/rebase.c b/builtin/rebase.c
> index 40176ca..ec63d3b 100644
> --- a/builtin/rebase.c
> +++ b/builtin/rebase.c
> @@ -8,6 +8,22 @@
>  #include "remote.h"
>  #include "branch.h"
>  #include "refs.h"
> +#include "rebase-am.h"
> +
> +enum rebase_type {
> + REBASE_TYPE_NONE = 0,
> + REBASE_TYPE_AM
> +};
> +
> +static const char *rebase_dir(enum rebase_type type)
> +{
> + switch (type) {
> + case REBASE_TYPE_AM:
> + return git_path_rebase_am_dir();
> + default:
> + die("BUG: invalid rebase_type %d", type);
> + }
> +}

This is awfully close to what the sequencer sports. So close that I would
call it technical debt.

It would most likely result in easier-to-maintain source code if the
sequencer and the rebase code shared as much as possible (in
sequencer.[ch], for historical reasons).


> diff --git a/rebase-am.c b/rebase-am.c
> new file mode 100644
> index 000..53e8798
> --- /dev/null
> +++ b/rebase-am.c
> @@ -0,0 +1,110 @@
> +#include "cache.h"
> +#include "rebase-am.h"
> +#include "run-command.h"
> +
> +GIT_PATH_FUNC(git_path_rebase_am_dir, "rebase-apply");
> +
> +void rebase_am_init(struct rebase_am *state, const char *dir)
> +{
> + if (!dir)
> + dir = git_path_rebase_am_dir();
> + rebase_options_init(>opts);
> + state->dir = xstrdup(dir);
> +}

Does it really make sense to have completely separate structs for the
different rebase types? I think not. It would not hurt IMO to have a
couple of fields that are only used for certain rebase types but not
others. The benefit of being able to reuse, code would far outweigh that
minimal cost.

It all comes back to my favorite paradigm: DRY. Don't Repeat Yourself.

Another important paradigm is: avoid feautures that you do not use. In
this instance, I have to ask why the init function accepts the "dir"
parameter? Do we ever need it? And if yes, would it make more sense to
introduce the parameter with the patch that actually uses it?

> +
> +void rebase_am_release(struct rebase_am *state)
> +{
> + rebase_options_release(>opts);
> + free(state->dir);

Urgh. The only reason for this free() and the corresponding xstrdup() is
so that the caller *may* release the directory before finishing the rebase
*if* it overrides the directory. That's not very elegant.

Why not simply state (by declaring the field as const char *) that it is
*not* the rebase machinery's duty to take care of the memory management of
this string?

This would simplify the common code flow, especially if it was done to
*all* strings in the state struct.

> +int rebase_am_in_progress(const struct rebase_am *state)
> +{
> + const char *dir = state ? state->dir : git_path_rebase_am_dir();
> + struct stat st;
> +
> + return !lstat(dir, ) && S_ISDIR(st.st_mode);
> +}

This function is sobbing inconsolably for being stuck into the rebase-am
part of the code, with a name that ensures that it will never grow up and
become more useful. Between its miserable life, it dreams of being named
dir_exists() and living the high life next to its buddy, file_exists().

> +int rebase_am_load(struct rebase_am *state)
> +{
> + if (rebase_options_load(>opts, state->dir) < 0)
> + return -1;
> +
> + return 0;
> +}

:-(

This looks like adding code for adding code's sake. Not only does it craft
its own return value instead of reusing rebase_options_load()'s, it is
just wrapping a single, simple statement, therefore its only use is to add
one layer of indirection.

> +static int run_format_patch(const char *patches, const struct object_id 
> *left,
> + const struct object_id *right)
> +{
> + struct child_process cp = CHILD_PROCESS_INIT;
> + int ret;
> +
> + cp.git_cmd = 1;
> + cp.out = xopen(patches, O_WRONLY | O_CREAT, 0777);
> + argv_array_push(, "format-patch");
> + argv_array_push(, "-k");
> + argv_array_push(, "--stdout");
> + argv_array_push(, "--full-index");
> + argv_array_push(, "--cherry-pick");
> + argv_array_push(, "--right-only");
> + argv_array_push(, "--src-prefix=a/");
> + argv_array_push(, "--dst-prefix=b/");
> + argv_array_push(, "--no-renames");
> + argv_array_push(, "--no-cover-letter");
> + argv_array_pushf(, "%s...%s", oid_to_hex(left), 
> oid_to_hex(right));
> +
> + ret = run_command();
> + close(cp.out);
> + return ret;
> +}
> +
> +static int run_am(const struct rebase_am *state, const char *patches)
> +{
> + struct child_process cp = CHILD_PROCESS_INIT;
> + int ret;
> +
> + cp.git_cmd = 1;
> + cp.in = xopen(patches, O_RDONLY);
> + argv_array_push(, "am");
> + argv_array_push(, "--rebasing");
> + if (state->opts.resolvemsg)
> + argv_array_pushf(, "--resolvemsg=%s", 
> state->opts.resolvemsg);
> +
> + ret = run_command();
> + close(cp.in);
> + return ret;
> +}

Yeah, these functions really want 

[PATCH/RFC/GSoC 06/17] rebase-am: introduce am backend for builtin rebase

2016-03-12 Thread Paul Tan
Since 7f59dbb (Rewrite rebase to use git-format-patch piped to git-am.,
2005-11-14), git-rebase will by default use "git am" to rebase commits.
This is done by first checking out to the new base commit, generating a
series of patches with the commits to replay, and then applying them
with git-am. Finally, if orig_head is a branch, it is updated to point
to the tip of the new rebased commit history.

Implement a skeletal version of this method of rebasing commits by
introducing a new rebase-am backend for our builtin-rebase. This
skeletal version can only call git-format-patch and git-am to perform a
rebase, and is unable to resume from a failed rebase. Subsequent
patches will re-implement all the missing features.

The symmetric difference between upstream...orig_head is used because in
a later patch, we will add an additional exclusion revision in order to
handle fork points correctly.  See b6266dc (rebase--am: use
--cherry-pick instead of --ignore-if-in-upstream, 2014-07-15).

The initial steps of checking out the new base commit, and the final
cleanup steps of updating refs are common between the am backend and
merge backend. As such, we implement the common setup and teardown
sequence in the shared functions rebase_common_setup() and
rebase_common_finish(), so we can share code with the merge backend when
it is implemented in a later patch.

Signed-off-by: Paul Tan 
---
 Makefile |   1 +
 builtin/rebase.c |  25 +
 rebase-am.c  | 110 +++
 rebase-am.h  |  22 +++
 rebase-common.c  |  81 
 rebase-common.h  |   6 +++
 6 files changed, 245 insertions(+)
 create mode 100644 rebase-am.c
 create mode 100644 rebase-am.h

diff --git a/Makefile b/Makefile
index b29c672..a2618ea 100644
--- a/Makefile
+++ b/Makefile
@@ -779,6 +779,7 @@ LIB_OBJS += prompt.o
 LIB_OBJS += quote.o
 LIB_OBJS += reachable.o
 LIB_OBJS += read-cache.o
+LIB_OBJS += rebase-am.o
 LIB_OBJS += rebase-common.o
 LIB_OBJS += reflog-walk.o
 LIB_OBJS += refs.o
diff --git a/builtin/rebase.c b/builtin/rebase.c
index 40176ca..ec63d3b 100644
--- a/builtin/rebase.c
+++ b/builtin/rebase.c
@@ -8,6 +8,22 @@
 #include "remote.h"
 #include "branch.h"
 #include "refs.h"
+#include "rebase-am.h"
+
+enum rebase_type {
+   REBASE_TYPE_NONE = 0,
+   REBASE_TYPE_AM
+};
+
+static const char *rebase_dir(enum rebase_type type)
+{
+   switch (type) {
+   case REBASE_TYPE_AM:
+   return git_path_rebase_am_dir();
+   default:
+   die("BUG: invalid rebase_type %d", type);
+   }
+}
 
 /**
  * Used by get_curr_branch_upstream_name() as a for_each_remote() callback to
@@ -208,6 +224,15 @@ int cmd_rebase(int argc, const char **argv, const char 
*prefix)
die(_("Failed to resolve '%s' as a valid revision."), 
"HEAD");
}
 
+   /* Run the appropriate rebase backend */
+   {
+   struct rebase_am state;
+   rebase_am_init(, rebase_dir(REBASE_TYPE_AM));
+   rebase_options_swap(, _opts);
+   rebase_am_run();
+   rebase_am_release();
+   }
+
rebase_options_release(_opts);
return 0;
 }
diff --git a/rebase-am.c b/rebase-am.c
new file mode 100644
index 000..53e8798
--- /dev/null
+++ b/rebase-am.c
@@ -0,0 +1,110 @@
+#include "cache.h"
+#include "rebase-am.h"
+#include "run-command.h"
+
+GIT_PATH_FUNC(git_path_rebase_am_dir, "rebase-apply");
+
+void rebase_am_init(struct rebase_am *state, const char *dir)
+{
+   if (!dir)
+   dir = git_path_rebase_am_dir();
+   rebase_options_init(>opts);
+   state->dir = xstrdup(dir);
+}
+
+void rebase_am_release(struct rebase_am *state)
+{
+   rebase_options_release(>opts);
+   free(state->dir);
+}
+
+int rebase_am_in_progress(const struct rebase_am *state)
+{
+   const char *dir = state ? state->dir : git_path_rebase_am_dir();
+   struct stat st;
+
+   return !lstat(dir, ) && S_ISDIR(st.st_mode);
+}
+
+int rebase_am_load(struct rebase_am *state)
+{
+   if (rebase_options_load(>opts, state->dir) < 0)
+   return -1;
+
+   return 0;
+}
+
+static int run_format_patch(const char *patches, const struct object_id *left,
+   const struct object_id *right)
+{
+   struct child_process cp = CHILD_PROCESS_INIT;
+   int ret;
+
+   cp.git_cmd = 1;
+   cp.out = xopen(patches, O_WRONLY | O_CREAT, 0777);
+   argv_array_push(, "format-patch");
+   argv_array_push(, "-k");
+   argv_array_push(, "--stdout");
+   argv_array_push(, "--full-index");
+   argv_array_push(, "--cherry-pick");
+   argv_array_push(, "--right-only");
+   argv_array_push(, "--src-prefix=a/");
+   argv_array_push(, "--dst-prefix=b/");
+   argv_array_push(, "--no-renames");
+   argv_array_push(, "--no-cover-letter");
+   argv_array_pushf(, "%s...%s",