Re: [PATCH v5 2/4] ls-files: optionally recurse into submodules

2016-09-28 Thread Junio C Hamano
Brandon Williams  writes:

> +--recurse-submodules::
> + Recursively calls ls-files on each submodule in the repository.
> + Currently there is only support for the --cached mode.

Good to describe it, but at this step, there is only support for the
"--cached" mode and it does not take a pathspec.

Also as the series progresses, I would expect this to be updated in
patches $n/4 (2 < $n).

> + if (recurse_submodules && argc)
> + die("ls-files --recurse-submodules does not support path "
> + "arguments");

Hmm, s/path arguments/pathspec/, perhaps, as the latter is specified
in the glossary?



Re: [PATCH v5 2/4] ls-files: optionally recurse into submodules

2016-09-28 Thread Stefan Beller
On Wed, Sep 28, 2016 at 2:50 PM, Brandon Williams  wrote:
> Allow ls-files to recognize submodules in order to retrieve a list of
> files from a repository's submodules.  This is done by forking off a
> process to recursively call ls-files on all submodules. Use top-level
> --super-prefix option to pass a path to the submodule which it can
> use to prepend to output or pathspec matching logic.
>
> Signed-off-by: Brandon Williams 
> ---
>  Documentation/git-ls-files.txt |   7 +-
>  builtin/ls-files.c | 139 
> -
>  git.c  |   2 +-
>  t/t3007-ls-files-recurse-submodules.sh | 100 
>  4 files changed, 208 insertions(+), 40 deletions(-)
>  create mode 100755 t/t3007-ls-files-recurse-submodules.sh
>
> diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
> index 0d933ac..446209e 100644
> --- a/Documentation/git-ls-files.txt
> +++ b/Documentation/git-ls-files.txt
> @@ -18,7 +18,8 @@ SYNOPSIS
> [--exclude-per-directory=]
> [--exclude-standard]
> [--error-unmatch] [--with-tree=]
> -   [--full-name] [--abbrev] [--] [...]
> +   [--full-name] [--recurse-submodules]
> +   [--abbrev] [--] [...]
>
>  DESCRIPTION
>  ---
> @@ -137,6 +138,10 @@ a space) at the start of each line:
> option forces paths to be output relative to the project
> top directory.
>
> +--recurse-submodules::
> +   Recursively calls ls-files on each submodule in the repository.
> +   Currently there is only support for the --cached mode.
> +
>  --abbrev[=]::
> Instead of showing the full 40-byte hexadecimal object
> lines, show only a partial prefix.
> diff --git a/builtin/ls-files.c b/builtin/ls-files.c
> index 00ea91a..e0e5cf5 100644
> --- a/builtin/ls-files.c
> +++ b/builtin/ls-files.c
> @@ -14,6 +14,7 @@
>  #include "resolve-undo.h"
>  #include "string-list.h"
>  #include "pathspec.h"
> +#include "run-command.h"
>
>  static int abbrev;
>  static int show_deleted;
> @@ -28,8 +29,10 @@ static int show_valid_bit;
>  static int line_terminator = '\n';
>  static int debug_mode;
>  static int show_eol;
> +static int recurse_submodules;
>
>  static const char *prefix;
> +static const char *super_prefix;
>  static int max_prefix_len;
>  static int prefix_len;
>  static struct pathspec pathspec;
> @@ -68,6 +71,19 @@ static void write_eolinfo(const struct cache_entry *ce, 
> const char *path)
>  static void write_name(const char *name)
>  {
> /*
> +* Prepend the super_prefix to name to construct the full_name to be
> +* written.  'full_name' gets reused across output lines to minimize 
> the
> +* allocation churn.
> +*/

When doing these tricks with the allocation churn (i.e. we make it
hard to read and understand
for a reader, then we should do it completely, i.e. keep full_name in
the strbuf and
only do a strbuf_setlen to reset the buffer just a bit. With this
implementation we burden the
reader/user to understand how the memory is kept over multiple calls
to this function,
but we still do more work than expected). So either I'd not worry
about performance
and provide an 'obvious correct' implementation, with e.g. no static
here and we free the memory
correctly. Or you'd go the performance route, but then we'd usually
ask for numbers.
(How much faster is it; Does the trickyness trade off well to the
performance gain?)


> +   static struct strbuf full_name = STRBUF_INIT;
> +   if (super_prefix && *super_prefix) {

Why do we have to check twice here? Wouldn't just

if (super_prefix) {
...

be enough?


[PATCH v5 2/4] ls-files: optionally recurse into submodules

2016-09-28 Thread Brandon Williams
Allow ls-files to recognize submodules in order to retrieve a list of
files from a repository's submodules.  This is done by forking off a
process to recursively call ls-files on all submodules. Use top-level
--super-prefix option to pass a path to the submodule which it can
use to prepend to output or pathspec matching logic.

Signed-off-by: Brandon Williams 
---
 Documentation/git-ls-files.txt |   7 +-
 builtin/ls-files.c | 139 -
 git.c  |   2 +-
 t/t3007-ls-files-recurse-submodules.sh | 100 
 4 files changed, 208 insertions(+), 40 deletions(-)
 create mode 100755 t/t3007-ls-files-recurse-submodules.sh

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 0d933ac..446209e 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -18,7 +18,8 @@ SYNOPSIS
[--exclude-per-directory=]
[--exclude-standard]
[--error-unmatch] [--with-tree=]
-   [--full-name] [--abbrev] [--] [...]
+   [--full-name] [--recurse-submodules]
+   [--abbrev] [--] [...]
 
 DESCRIPTION
 ---
@@ -137,6 +138,10 @@ a space) at the start of each line:
option forces paths to be output relative to the project
top directory.
 
+--recurse-submodules::
+   Recursively calls ls-files on each submodule in the repository.
+   Currently there is only support for the --cached mode.
+
 --abbrev[=]::
Instead of showing the full 40-byte hexadecimal object
lines, show only a partial prefix.
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 00ea91a..e0e5cf5 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "string-list.h"
 #include "pathspec.h"
+#include "run-command.h"
 
 static int abbrev;
 static int show_deleted;
@@ -28,8 +29,10 @@ static int show_valid_bit;
 static int line_terminator = '\n';
 static int debug_mode;
 static int show_eol;
+static int recurse_submodules;
 
 static const char *prefix;
+static const char *super_prefix;
 static int max_prefix_len;
 static int prefix_len;
 static struct pathspec pathspec;
@@ -68,6 +71,19 @@ static void write_eolinfo(const struct cache_entry *ce, 
const char *path)
 static void write_name(const char *name)
 {
/*
+* Prepend the super_prefix to name to construct the full_name to be
+* written.  'full_name' gets reused across output lines to minimize the
+* allocation churn.
+*/
+   static struct strbuf full_name = STRBUF_INIT;
+   if (super_prefix && *super_prefix) {
+   strbuf_reset(_name);
+   strbuf_addstr(_name, super_prefix);
+   strbuf_addstr(_name, name);
+   name = full_name.buf;
+   }
+
+   /*
 * With "--full-name", prefix_len=0; this caller needs to pass
 * an empty string in that case (a NULL is good for "").
 */
@@ -152,55 +168,84 @@ static void show_killed_files(struct dir_struct *dir)
}
 }
 
+/**
+ * Recursively call ls-files on a submodule
+ */
+static void show_gitlink(const struct cache_entry *ce)
+{
+   struct child_process cp = CHILD_PROCESS_INIT;
+   int status;
+
+   argv_array_pushf(, "--super-prefix=%s%s/",
+super_prefix ? super_prefix : "",
+ce->name);
+   argv_array_push(, "ls-files");
+   argv_array_push(, "--recurse-submodules");
+
+   cp.git_cmd = 1;
+   cp.dir = ce->name;
+   status = run_command();
+   if (status)
+   exit(status);
+}
+
 static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 {
+   struct strbuf name = STRBUF_INIT;
int len = max_prefix_len;
+   if (super_prefix)
+   strbuf_addstr(, super_prefix);
+   strbuf_addstr(, ce->name);
 
if (len >= ce_namelen(ce))
die("git ls-files: internal error - cache entry not superset of 
prefix");
 
-   if (!match_pathspec(, ce->name, ce_namelen(ce),
-   len, ps_matched,
-   S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)))
-   return;
+   if (recurse_submodules && S_ISGITLINK(ce->ce_mode)) {
+   show_gitlink(ce);
+   } else if (match_pathspec(, name.buf, name.len,
+ len, ps_matched,
+ S_ISDIR(ce->ce_mode) ||
+ S_ISGITLINK(ce->ce_mode))) {
+   if (tag && *tag && show_valid_bit &&
+   (ce->ce_flags & CE_VALID)) {
+   static char alttag[4];
+   memcpy(alttag, tag, 3);
+   if (isalpha(tag[0]))
+   alttag[0] = tolower(tag[0]);
+   else if