Re: What's cooking in git.git (May 2017, #08; Mon, 29)

2017-05-30 Thread Daniel Ferreira (theiostream)
On Mon, May 29, 2017 at 3:23 AM, Junio C Hamano  wrote:
> * df/dir-iter-remove-subtree (2017-05-29) 5 commits
>  . remove_subtree(): reimplement using iterators
>  . dir_iterator: rewrite state machine model
>  . dir_iterator: refactor dir_iterator_advance
>  . remove_subtree(): test removing nested directories
>  . dir_iterator: add tests for dir_iterator API
>
>  Update the dir-iterator API and use it to reimplement
>  remove_subtree().
>
>  GSoC microproject.
>  Ejected as it conflicts with other topics in flight in a
>  non-trivial way.

I see this conflicts with Duy's
fa7e9c0c24637d6b041a2919a33956b68bfd0869 ("files-backend: make reflog
iterator go through per-worktree reflog", 2017-04-19) because his
commit creates a new dir_iterator whose NULL value means something
semantically. This would be perfectly OK with the old dir_iterator API
(where NULL was not a possible return value from dir_iterator_begin()
and could be "reserved" for this case), but will most probably
generate issues with the new API, where NULL can *also* mean we failed
to lstat() the directory we're trying to iterate over[1].

I'll try to address this issue playing with pu, but I'm just wondering
what would be the best way to send this upcoming not-based-on-master
patch to the list. Should I just send it normally and signal it
originates from pu rather than master?

Thanks,

[1]: 
https://public-inbox.org/git/1493226219-33423-1-git-send-email-bnm...@gmail.com/T/#m68286d783b5dfbad6921fbf012f685a629645c61


Implementation of sorted hashmap iteration

2017-05-15 Thread Daniel Ferreira
---
Hi there! When implementing a patch series to port git-add--interactive
from Perl to C[1] I ended up implementing this quirk to iterate through
a hashmap in the order elements were added to it. In the end, it did
not make it into the series but I figured I'd share it anyway if it
interests anyone.

[1]: 
https://public-inbox.org/git/1494907234-28903-1-git-send-email-bnm...@gmail.com/T/#t

-- Daniel.

 hashmap.c | 25 +
 hashmap.h | 12 
 2 files changed, 37 insertions(+)

diff --git a/hashmap.c b/hashmap.c
index 7d1044e..948576c 100644
--- a/hashmap.c
+++ b/hashmap.c
@@ -196,6 +196,7 @@ void hashmap_add(struct hashmap *map, void *entry)
unsigned int b = bucket(map, entry);

/* add entry */
+   ((struct hashmap_entry *) entry)->index = map->size;
((struct hashmap_entry *) entry)->next = map->table[b];
map->table[b] = entry;

@@ -254,6 +255,30 @@ void *hashmap_iter_next(struct hashmap_iter *iter)
}
 }

+void hashmap_iter_init_sorted(struct hashmap *map,
+   struct hashmap_iter_sorted *iter)
+{
+   hashmap_iter_init(map, (struct hashmap_iter *)iter);
+   iter->pos = 0;
+   iter->sorted_entries = xcalloc(map->size,
+   sizeof(struct hashmap_entry *));
+
+   struct hashmap_entry *ent;
+   while ((ent = hashmap_iter_next((struct hashmap_iter *)iter))) {
+   iter->sorted_entries[ent->index] = ent;
+   }
+}
+
+void *hashmap_iter_next_sorted(struct hashmap_iter_sorted *iter)
+{
+   return iter->sorted_entries[iter->pos++];
+}
+
+void hashmap_iter_free_sorted(struct hashmap_iter_sorted *iter)
+{
+   free(iter->sorted_entries);
+}
+
 struct pool_entry {
struct hashmap_entry ent;
size_t len;
diff --git a/hashmap.h b/hashmap.h
index de6022a..f2a5d36 100644
--- a/hashmap.h
+++ b/hashmap.h
@@ -30,6 +30,7 @@ static inline unsigned int sha1hash(const unsigned char *sha1)
 struct hashmap_entry {
struct hashmap_entry *next;
unsigned int hash;
+   unsigned int index;
 };

 typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key,
@@ -48,6 +49,12 @@ struct hashmap_iter {
unsigned int tablepos;
 };

+struct hashmap_iter_sorted {
+   struct hashmap_iter base;
+   struct hashmap_entry **sorted_entries;
+   unsigned int pos;
+};
+
 /* hashmap functions */

 extern void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
@@ -112,6 +119,11 @@ static inline void *hashmap_iter_first(struct hashmap *map,
return hashmap_iter_next(iter);
 }

+extern void hashmap_iter_init_sorted(struct hashmap *map,
+   struct hashmap_iter_sorted *iter);
+extern void *hashmap_iter_next_sorted(struct hashmap_iter_sorted *iter);
+extern void hashmap_iter_free_sorted(struct hashmap_iter_sorted *iter);
+
 /* string interning */

 extern const void *memintern(const void *data, size_t len);
--
2.7.4 (Apple Git-66)



[PATCH v2 3/4] add--helper: implement interactive status command

2017-05-15 Thread Daniel Ferreira
Implement add--interactive's status command in the add--helper builtin.
It prints a numstat comparing changed files between a) the worktree and
the index; b) the index and the HEAD.

To do so, we use run_diff_index() and run_diff_files() to get changed
files, use the diffstat API on them to get the numstat and use a
combination of a hashmap and qsort() to print the result in
O(n) + O(n lg n) complexity.

This is the first add--interactive command implemented in C of those
anticipated by 6cbc52d ("add--helper: create builtin helper for
interactive add", 2017-05-15).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 builtin/add--helper.c | 279 ++
 1 file changed, 279 insertions(+)

diff --git a/builtin/add--helper.c b/builtin/add--helper.c
index 6a97f0e..f6d35bf 100644
--- a/builtin/add--helper.c
+++ b/builtin/add--helper.c
@@ -1,6 +1,285 @@
 #include "builtin.h"
+#include "color.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "hashmap.h"
+#include "revision.h"
+
+#define HEADER_INDENT "  "
+
+enum collection_phase {
+   WORKTREE,
+   INDEX
+};
+
+struct file_stat {
+   struct hashmap_entry ent;
+   struct {
+   uintmax_t added, deleted;
+   } index, worktree;
+   char name[FLEX_ARRAY];
+};
+
+struct collection_status {
+   enum collection_phase phase;
+
+   const char *reference;
+   struct pathspec pathspec;
+
+   struct hashmap file_map;
+};
+
+static int use_color = -1;
+enum color_add_i {
+   COLOR_PROMPT,
+   COLOR_HEADER,
+   COLOR_HELP,
+   COLOR_ERROR
+};
+
+static char colors[][COLOR_MAXLEN] = {
+   GIT_COLOR_BOLD_BLUE, /* Prompt */
+   GIT_COLOR_BOLD,  /* Header */
+   GIT_COLOR_BOLD_RED,  /* Help */
+   GIT_COLOR_BOLD_RED   /* Error */
+};
+
+static const char *get_color(enum color_add_i ix)
+{
+   if (want_color(use_color))
+   return colors[ix];
+   return "";
+}
+
+static int parse_color_slot(const char *slot)
+{
+   if (!strcasecmp(slot, "prompt"))
+   return COLOR_PROMPT;
+   if (!strcasecmp(slot, "header"))
+   return COLOR_HEADER;
+   if (!strcasecmp(slot, "help"))
+   return COLOR_HELP;
+   if (!strcasecmp(slot, "error"))
+   return COLOR_ERROR;
+
+   return -1;
+}
+
+static int add_i_config(const char *var,
+   const char *value, void *cbdata)
+{
+   const char *name;
+
+   if (!strcmp(var, "color.interactive")) {
+   use_color = git_config_colorbool(var, value);
+   return 0;
+   }
+
+   if (skip_prefix(var, "color.interactive", )) {
+   int slot = parse_color_slot(name);
+   if (slot < 0)
+   return 0;
+   if (!value)
+   return config_error_nonbool(var);
+   return color_parse(value, colors[slot]);
+   }
+
+   return git_default_config(var, value, cbdata);
+}
+
+static int hash_cmp(const void *entry, const void *entry_or_key,
+ const void *keydata)
+{
+   const struct file_stat *e1 = entry, *e2 = entry_or_key;
+   const char *name = keydata ? keydata : e2->name;
+
+   return strcmp(e1->name, name);
+}
+
+static int alphabetical_cmp(const void *a, const void *b)
+{
+   struct file_stat *f1 = *((struct file_stat **)a);
+   struct file_stat *f2 = *((struct file_stat **)b);
+
+   return strcmp(f1->name, f2->name);
+}
+
+static void collect_changes_cb(struct diff_queue_struct *q,
+struct diff_options *options,
+void *data)
+{
+   struct collection_status *s = data;
+   struct diffstat_t stat = { 0 };
+   int i;
+
+   if (!q->nr)
+   return;
+
+   for (i = 0; i < q->nr; i++) {
+   struct diff_filepair *p;
+   p = q->queue[i];
+   diff_flush_stat(p, options, );
+   }
+
+   for (i = 0; i < stat.nr; i++) {
+   struct file_stat *entry;
+   const char *name = stat.files[i]->name;
+   unsigned int hash = strhash(name);
+
+   entry = hashmap_get_from_hash(>file_map, hash, name);
+   if (!entry) {
+   FLEX_ALLOC_STR(entry, name, name);
+   hashmap_entry_init(entry, hash);
+   hashmap_add(>file_map, entry);
+   }
+
+   if (s->phase == WORKTREE) {
+   entry->worktree.added = stat.files[i]->added;
+   entry->worktree.deleted = stat.files[i]->deleted;
+   } else if (s->phase == INDEX) {
+  

[PATCH v2 4/4] add--interactive: use add-interactive--helper for status_cmd

2017-05-15 Thread Daniel Ferreira
Call the newly introduced git-add-interactive--helper builtin on
status_cmd() instead of relying on git-add--interactive's Perl
functions to build print the numstat.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 git-add--interactive.perl | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/git-add--interactive.perl b/git-add--interactive.perl
index 709a5f6..8c192bf 100755
--- a/git-add--interactive.perl
+++ b/git-add--interactive.perl
@@ -632,9 +632,7 @@ EOF
 }
 
 sub status_cmd {
-   list_and_choose({ LIST_ONLY => 1, HEADER => $status_head },
-   list_modified());
-   print "\n";
+   system(qw(git add--helper --status));
 }
 
 sub say_n_paths {
-- 
2.7.4 (Apple Git-66)



[PATCH v2 1/4] diff: export diffstat interface

2017-05-15 Thread Daniel Ferreira
Make the diffstat interface (namely, the diffstat_t struct and
diff_flush_stat) no longer be internal to diff.c and allow it to be used
by other parts of git.

This is helpful for code that may want to easily extract information
from files using the diff machinery, while flushing it differently from
how the show_* functions used by diff_flush() do it. One example is the
builtin implementation of git-add--interactive's status.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 diff.c | 17 +
 diff.h | 19 ++-
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/diff.c b/diff.c
index 74283d9..7c073c2 100644
--- a/diff.c
+++ b/diff.c
@@ -1460,21 +1460,6 @@ static char *pprint_rename(const char *a, const char *b)
return strbuf_detach(, NULL);
 }
 
-struct diffstat_t {
-   int nr;
-   int alloc;
-   struct diffstat_file {
-   char *from_name;
-   char *name;
-   char *print_name;
-   unsigned is_unmerged:1;
-   unsigned is_binary:1;
-   unsigned is_renamed:1;
-   unsigned is_interesting:1;
-   uintmax_t added, deleted;
-   } **files;
-};
-
 static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
  const char *name_a,
  const char *name_b)
@@ -4310,7 +4295,7 @@ static void diff_flush_patch(struct diff_filepair *p, 
struct diff_options *o)
run_diff(p, o);
 }
 
-static void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
+void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
struct diffstat_t *diffstat)
 {
if (diff_unmodified_pair(p))
diff --git a/diff.h b/diff.h
index 5be1ee7..4cdc72d 100644
--- a/diff.h
+++ b/diff.h
@@ -188,6 +188,21 @@ struct diff_options {
int diff_path_counter;
 };
 
+struct diffstat_t {
+   int nr;
+   int alloc;
+   struct diffstat_file {
+   char *from_name;
+   char *name;
+   char *print_name;
+   unsigned is_unmerged:1;
+   unsigned is_binary:1;
+   unsigned is_renamed:1;
+   unsigned is_interesting:1;
+   uintmax_t added, deleted;
+   } **files;
+};
+
 enum color_diff {
DIFF_RESET = 0,
DIFF_CONTEXT = 1,
@@ -206,7 +221,6 @@ const char *diff_get_color(int diff_use_color, enum 
color_diff ix);
 
 const char *diff_line_prefix(struct diff_options *);
 
-
 extern const char mime_boundary_leader[];
 
 extern struct combine_diff_path *diff_tree_paths(
@@ -262,6 +276,9 @@ extern void diff_change(struct diff_options *,
 
 extern struct diff_filepair *diff_unmerge(struct diff_options *, const char 
*path);
 
+void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
+   struct diffstat_t *diffstat);
+
 #define DIFF_SETUP_REVERSE 1
 #define DIFF_SETUP_USE_CACHE   2
 #define DIFF_SETUP_USE_SIZE_CACHE  4
-- 
2.7.4 (Apple Git-66)



[PATCH v2 2/4] add--helper: create builtin helper for interactive add

2017-05-15 Thread Daniel Ferreira
Create a builtin helper for git-add--interactive, which right now is not
able to do anything.

This is the first step in an effort to convert git-add--interactive.perl
to a C builtin, in search for better portability, expressibility and
performance (specially on non-POSIX systems like Windows).

Additionally, an eventual complete port of git-add--interactive would
remove the last "big" Git script to have Perl as a dependency, allowing
most Git users to have a NOPERL build running without big losses.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 .gitignore| 1 +
 Makefile  | 1 +
 builtin.h | 1 +
 builtin/add--helper.c | 6 ++
 git.c | 1 +
 5 files changed, 10 insertions(+)
 create mode 100644 builtin/add--helper.c

diff --git a/.gitignore b/.gitignore
index 833ef3b..11cec05 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@
 /git
 /git-add
 /git-add--interactive
+/git-add--helper
 /git-am
 /git-annotate
 /git-apply
diff --git a/Makefile b/Makefile
index e35542e..b7f49b6 100644
--- a/Makefile
+++ b/Makefile
@@ -873,6 +873,7 @@ LIB_OBJS += xdiff-interface.o
 LIB_OBJS += zlib.o
 
 BUILTIN_OBJS += builtin/add.o
+BUILTIN_OBJS += builtin/add--helper.o
 BUILTIN_OBJS += builtin/am.o
 BUILTIN_OBJS += builtin/annotate.o
 BUILTIN_OBJS += builtin/apply.o
diff --git a/builtin.h b/builtin.h
index 9e4a898..85b4c55 100644
--- a/builtin.h
+++ b/builtin.h
@@ -30,6 +30,7 @@ extern int textconv_object(const char *path, unsigned mode, 
const struct object_
 extern int is_builtin(const char *s);
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
+extern int cmd_add__helper(int argc, const char **argv, const char *prefix);
 extern int cmd_am(int argc, const char **argv, const char *prefix);
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_apply(int argc, const char **argv, const char *prefix);
diff --git a/builtin/add--helper.c b/builtin/add--helper.c
new file mode 100644
index 000..6a97f0e
--- /dev/null
+++ b/builtin/add--helper.c
@@ -0,0 +1,6 @@
+#include "builtin.h"
+
+int cmd_add__helper(int argc, const char **argv, const char *prefix)
+{
+   return 0;
+}
diff --git a/git.c b/git.c
index 8ff44f0..47ee257 100644
--- a/git.c
+++ b/git.c
@@ -391,6 +391,7 @@ static int run_builtin(struct cmd_struct *p, int argc, 
const char **argv)
 
 static struct cmd_struct commands[] = {
{ "add", cmd_add, RUN_SETUP | NEED_WORK_TREE },
+   { "add--helper", cmd_add__helper, RUN_SETUP | NEED_WORK_TREE },
{ "am", cmd_am, RUN_SETUP | NEED_WORK_TREE },
{ "annotate", cmd_annotate, RUN_SETUP },
{ "apply", cmd_apply, RUN_SETUP_GENTLY },
-- 
2.7.4 (Apple Git-66)



[PATCH v2 0/4] Port git-add--interactive.perl:status_cmd to C

2017-05-15 Thread Daniel Ferreira
This is the second version of a patch series to start porting
git-add--interactive from Perl to C.

Series:
v1: 
https://public-inbox.org/git/1494009820-2090-1-git-send-email-bnm...@gmail.com/

Travis CI build: https://travis-ci.org/theiostream/git/builds/232669894

I have applied most of the changes suggested by Johannes and Ævar (thank
you!). The one exception was Johannes' request to make this a WIP patch
series with a sort-of "design" of what's next to come. I did not do
this because I still have no clue...! I'll begin experimenting on
update_cmd() to see if I gain some insight on this issue that I could
bring to this series. I do think this would be a good idea, though! :)

-- Daniel.

Daniel Ferreira (4):
  diff: export diffstat interface
  add--helper: create builtin helper for interactive add
  add--helper: implement interactive status command
  add--interactive: use add-interactive--helper for status_cmd

 .gitignore|   1 +
 Makefile  |   1 +
 builtin.h |   1 +
 builtin/add--helper.c | 285 ++
 diff.c|  17 +--
 diff.h|  19 +++-
 git-add--interactive.perl |   4 +-
 git.c |   1 +
 8 files changed, 309 insertions(+), 20 deletions(-)
 create mode 100644 builtin/add--helper.c

--
2.7.4 (Apple Git-66)



Re: [PATCH 0/3] Port git-add--interactive.perl:status_cmd to C

2017-05-05 Thread Daniel Ferreira (theiostream)
On Fri, May 5, 2017 at 7:38 PM, Johannes Schindelin
 wrote:
> But maybe you want to keep the naming a little more consistent with the
> Perl script, e.g. instead of calling the function `print_modified()` call
> it already `list()` (and rename it later to `list_and_choose()` once you
> have taught it to ask for a choice)?

Actually, I named it print_modified() because I anticipated there
would be no list_and_choose() equivalent in C. I don't think the
builtin should have the interactive menu + modified list + untracked
list being handled by one function. Rather, I think a saner way to go
on with it would be to create functions like print_untracked();
choose_from_input(); print_menu() etc.

This is still pure speculation from what I've written until now and
from the roadmap I have in my head (I do not know how writing code
from now on will actually be), but I have a hunch that
list_and_choose() is already convoluted enough in Perl; in C it might
become a monster.

> Thank you for this pleasant read!

Thank *you* for the quick and thorough review :)

-- Daniel.


Re: [PATCH 2/3] add--interactive: add builtin helper for interactive add

2017-05-05 Thread Daniel Ferreira (theiostream)
On Fri, May 5, 2017 at 7:30 PM, Johannes Schindelin
 wrote:
>> +static int git_add_interactive_config(const char *var,
>
> Not git_add_interactive__helper_config()? ;-)

I don't get if you mean this ironically (because of the verbosity) or
if you do think this would be a good name ;P

>> + for (i = 0; i < q->nr; i++) {
>> + struct diff_filepair *p;
>> + p = q->queue[i];
>> + diff_flush_stat(p, options, );
>> + }
>> +
>> + for (i = 0; i < stat.nr; i++) {
>> + int file_index = s->file_count;
>> + for (j = 0; j < s->file_count; j++) {
>> + if (!strcmp(s->files[j].path, stat.files[i]->name)) {
>> + file_index = j;
>> + break;
>> + }
>> + }
>
> So basically, this is looking up in a list whether we saw the file in
> question already, and the reason we have to do that is that we run the
> entire shebang twice, once with the worktree and once with the index.
>
> I wonder whether it would not make sense to switch away s->files from a
> list to a hashmap.
> [...]
> BTW in the first pass, we pretty much know that we only get unique names,
> so the entire lookup is unnecessary and will just increase the time
> complexity from O(n) to O(n^2). So let's avoid that.
>
> By moving to a hashmap, you can even get the second phase down to an
> expected O(n).

How would you go about implementing that hashmap (i.e. what should be
the hash)? Does Git have any interface for it, or is there any example
I can look after in the codebase?

> Apart from using PATH_MAX bytes for most likely only short names: [...]

If not PATH_MAX, what should I go for? Make it a strbuf? I tend to
believe keeping that on the stack would be simpler and more optimal.

> Now that I read this and remember that only WORKTREE and INDEX are handled
> in the callback function: is there actually a use for the NONE enum value?
> I.e. is current_mode read out in any other context than the callback
> function? If there is no other read, then the NONE enum value is just
> confusing.

I just preferred to have a declared non-handled value than leave
something undefined behind. I felt it might avoid headaches in the
future with petty segfaults.

> Why not collapse all three functions into one? It is not like they are
> totally unrelated nor super-long.

To me it is a matter of personal preference to keep them separate. If
there is, however, any technical or project-style-related reason to
get them together, I'll certainly do it.

>> +static void print_modified(void)
>> +{
>> + int i;
>> + struct add_interactive_status s;
>> + const char *modified_fmt = _("%12s %12s %s");
>
> We cannot really translate that...

Apparently, we can. Ævar covered that in his reply.

>> + printf(ADD_INTERACTIVE_HEADER_INDENT);
>> + color_fprintf(stdout, header_color, modified_fmt, _("staged"),
>> + _("unstaged"), _("path"));
>
> I think these _() need to become N_().

I cannot find any call to N_() outside of Perl code. What should that
even do differently?

>> +static void status_cmd(void)
>> +{
>> + print_modified();
>> +}
>
> As long as this function really only calls another function with no
> parameters, let's just drop it. We can call print_modified() instead of
> status_cmd() just as easily.

I thought calling status_cmd() would make that more clear, but I agree
-- the options already make it clear enough,

I agree with all points I did not directly address. And thank you for
the review :)

-- Daniel.


[PATCH 0/3] Port git-add--interactive.perl:status_cmd to C

2017-05-05 Thread Daniel Ferreira
Hm, it looks like my GSoC project won't be in the Git organization,
although I'm still interested in going for this so I guess I'll send
patches to implement my proposal anyway (although certainly in a slower
pace than I would if on the program).

This series introduces git-add-interactive--helper (or should it be
called git-add--interactive--helper?) as a builtin capable of doing
what the Perl script's status_cmd() would do.

I wish this patch series could have been smaller, although I don't
think it would make sense to bring add-interactive "subunits" smaller
than a command to the helper, and status_cmd (aside from probably
add_untracked_cmd) was the simplest one to do after all -- and still
required ~250 lines on the new builtin.

Another regret I had was not being able to retire any code from the
Perl script yet (and will likely not be able to do so until all
commands have been ported), but that is not such a big thing after
all.

As for the new builtin, I think the color handling code is pretty
straightforward. In fact, it was practically copied from places like
clean.c or diff.c (which makes me wonder if some of that code could
be generalized to avoid duplication). The same goes for the pretty
simple option parsing code.

Bigger issues seem to arise when dealing with getting the numstat.
While (as Junio anticipated on an RFC) some tasks like getting the
actual diff and splitting it may require making the "diff
machinery" write to a temporary file that we will read and do things
with, I think it would be weird to do that for parsing a simple
numstat from it. My first instinct was to create something like
show_numstat_interactive() or something on diff.c (analogous to the
other show_* functions). Doing that, however, would stumble upon
another issue: we would not be able to print both a file's diff
against the index (obtained from run_diff_index) and against the
worktree (obtained from run_diff_files) in that function. The solution
I came up with was to export the diffstat interface from diff.c into
the world and allow our new builtin to use that and build our status_cmd
output. Unless this breaks some rule of Git's API design, the result
seems pretty reasonable to me.

Travis CI build: https://travis-ci.org/theiostream/git/builds/229232202

Looking forward for feedback,
Daniel.

Daniel Ferreira (3):
  diff: export diffstat interface
  add--interactive: add builtin helper for interactive add
  add--interactive: use add-interactive--helper for status_cmd

 .gitignore|   1 +
 Makefile  |   1 +
 builtin.h |   1 +
 builtin/add-interactive--helper.c | 258 ++
 diff.c|  17 +--
 diff.h|  19 ++-
 git-add--interactive.perl |   4 +-
 git.c |   1 +
 8 files changed, 282 insertions(+), 20 deletions(-)
 create mode 100644 builtin/add-interactive--helper.c

--
2.7.4 (Apple Git-66)



[PATCH 3/3] add--interactive: use add-interactive--helper for status_cmd

2017-05-05 Thread Daniel Ferreira
Call the newly introduced git-add-interactive--helper builtin on
status_cmd() instead of relying on git-add--interactive's Perl
functions to build print the numstat.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 git-add--interactive.perl | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/git-add--interactive.perl b/git-add--interactive.perl
index 709a5f6..8617462 100755
--- a/git-add--interactive.perl
+++ b/git-add--interactive.perl
@@ -632,9 +632,7 @@ EOF
 }
 
 sub status_cmd {
-   list_and_choose({ LIST_ONLY => 1, HEADER => $status_head },
-   list_modified());
-   print "\n";
+   system(qw(git add-interactive--helper --status));
 }
 
 sub say_n_paths {
-- 
2.7.4 (Apple Git-66)



[PATCH 2/3] add--interactive: add builtin helper for interactive add

2017-05-05 Thread Daniel Ferreira
Create a builtin helper for git-add--interactive, which right now is
only able to reproduce git-add--interactive.perl's status_cmd()
function, providing a summarized diff numstat to the user.

This is the first step in an effort to convert git-add--interactive.perl
to a C builtin, in search for better portability, expressibility and
performance (specially on non-POSIX systems like Windows).

Additionally, an eventual complete port of git-add--interactive would
remove the last "big" Git script to have Perl as a dependency, allowing
most Git users to have a NOPERL build running without big losses.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 .gitignore|   1 +
 Makefile  |   1 +
 builtin.h |   1 +
 builtin/add-interactive--helper.c | 258 ++
 git.c |   1 +
 5 files changed, 262 insertions(+)
 create mode 100644 builtin/add-interactive--helper.c

diff --git a/.gitignore b/.gitignore
index 833ef3b..0d6cfe4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@
 /git
 /git-add
 /git-add--interactive
+/git-add-interactive--helper
 /git-am
 /git-annotate
 /git-apply
diff --git a/Makefile b/Makefile
index e35542e..842fce2 100644
--- a/Makefile
+++ b/Makefile
@@ -873,6 +873,7 @@ LIB_OBJS += xdiff-interface.o
 LIB_OBJS += zlib.o
 
 BUILTIN_OBJS += builtin/add.o
+BUILTIN_OBJS += builtin/add-interactive--helper.o
 BUILTIN_OBJS += builtin/am.o
 BUILTIN_OBJS += builtin/annotate.o
 BUILTIN_OBJS += builtin/apply.o
diff --git a/builtin.h b/builtin.h
index 9e4a898..3d6a0ab 100644
--- a/builtin.h
+++ b/builtin.h
@@ -30,6 +30,7 @@ extern int textconv_object(const char *path, unsigned mode, 
const struct object_
 extern int is_builtin(const char *s);
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
+extern int cmd_add_interactive__helper(int argc, const char **argv, const char 
*prefix);
 extern int cmd_am(int argc, const char **argv, const char *prefix);
 extern int cmd_annotate(int argc, const char **argv, const char *prefix);
 extern int cmd_apply(int argc, const char **argv, const char *prefix);
diff --git a/builtin/add-interactive--helper.c 
b/builtin/add-interactive--helper.c
new file mode 100644
index 000..97ca1b3
--- /dev/null
+++ b/builtin/add-interactive--helper.c
@@ -0,0 +1,258 @@
+#include "builtin.h"
+#include "color.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "revision.h"
+
+#define ADD_INTERACTIVE_HEADER_INDENT "  "
+
+enum add_interactive_collection_mode {
+   COLLECTION_MODE_NONE,
+   COLLECTION_MODE_WORKTREE,
+   COLLECTION_MODE_INDEX
+};
+
+struct add_interactive_file_status {
+   int selected;
+
+   char path[PATH_MAX];
+
+   int lines_added_index;
+   int lines_deleted_index;
+   int lines_added_worktree;
+   int lines_deleted_worktree;
+};
+
+struct add_interactive_status {
+   enum add_interactive_collection_mode current_mode;
+
+   const char *reference;
+   struct pathspec pathspec;
+
+   int file_count;
+   struct add_interactive_file_status *files;
+};
+
+static int add_interactive_use_color = -1;
+enum color_add_interactive {
+   ADD_INTERACTIVE_PROMPT,
+   ADD_INTERACTIVE_HEADER,
+   ADD_INTERACTIVE_HELP,
+   ADD_INTERACTIVE_ERROR
+};
+
+static char add_interactive_colors[][COLOR_MAXLEN] = {
+   GIT_COLOR_BOLD_BLUE, /* Prompt */
+   GIT_COLOR_BOLD,  /* Header */
+   GIT_COLOR_BOLD_RED,  /* Help */
+   GIT_COLOR_BOLD_RED   /* Error */
+};
+
+static const char *add_interactive_get_color(enum color_add_interactive ix)
+{
+   if (want_color(add_interactive_use_color))
+   return add_interactive_colors[ix];
+   return "";
+}
+
+static int parse_add_interactive_color_slot(const char *slot)
+{
+   if (!strcasecmp(slot, "prompt"))
+   return ADD_INTERACTIVE_PROMPT;
+   if (!strcasecmp(slot, "header"))
+   return ADD_INTERACTIVE_HEADER;
+   if (!strcasecmp(slot, "help"))
+   return ADD_INTERACTIVE_HELP;
+   if (!strcasecmp(slot, "error"))
+   return ADD_INTERACTIVE_ERROR;
+
+   return -1;
+}
+
+static int git_add_interactive_config(const char *var,
+   const char *value, void *cbdata)
+{
+   const char *name;
+
+   if (!strcmp(var, "color.interactive")) {
+   add_interactive_use_color = git_config_colorbool(var, value);
+   return 0;
+   }
+
+   if (skip_prefix(var, "color.interactive", )) {
+   int slot = parse_add_interactive_color_slot(name);
+   if (slot < 0)
+   return 0;
+   if (!value)
+   return config_error_nonbool(var);
+   

[PATCH 1/3] diff: export diffstat interface

2017-05-05 Thread Daniel Ferreira
Make the diffstat interface (namely, the diffstat_t struct and
diff_flush_stat) no longer be internal to diff.c and allow it to be used
by other parts of git.

This is helpful for code that may want to easily extract information
from files using the diff machinery, while flushing it differently from
how the show_* functions used by diff_flush() do it.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 diff.c | 17 +
 diff.h | 19 ++-
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/diff.c b/diff.c
index 74283d9..7c073c2 100644
--- a/diff.c
+++ b/diff.c
@@ -1460,21 +1460,6 @@ static char *pprint_rename(const char *a, const char *b)
return strbuf_detach(, NULL);
 }
 
-struct diffstat_t {
-   int nr;
-   int alloc;
-   struct diffstat_file {
-   char *from_name;
-   char *name;
-   char *print_name;
-   unsigned is_unmerged:1;
-   unsigned is_binary:1;
-   unsigned is_renamed:1;
-   unsigned is_interesting:1;
-   uintmax_t added, deleted;
-   } **files;
-};
-
 static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
  const char *name_a,
  const char *name_b)
@@ -4310,7 +4295,7 @@ static void diff_flush_patch(struct diff_filepair *p, 
struct diff_options *o)
run_diff(p, o);
 }
 
-static void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
+void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
struct diffstat_t *diffstat)
 {
if (diff_unmodified_pair(p))
diff --git a/diff.h b/diff.h
index 5be1ee7..4cdc72d 100644
--- a/diff.h
+++ b/diff.h
@@ -188,6 +188,21 @@ struct diff_options {
int diff_path_counter;
 };
 
+struct diffstat_t {
+   int nr;
+   int alloc;
+   struct diffstat_file {
+   char *from_name;
+   char *name;
+   char *print_name;
+   unsigned is_unmerged:1;
+   unsigned is_binary:1;
+   unsigned is_renamed:1;
+   unsigned is_interesting:1;
+   uintmax_t added, deleted;
+   } **files;
+};
+
 enum color_diff {
DIFF_RESET = 0,
DIFF_CONTEXT = 1,
@@ -206,7 +221,6 @@ const char *diff_get_color(int diff_use_color, enum 
color_diff ix);
 
 const char *diff_line_prefix(struct diff_options *);
 
-
 extern const char mime_boundary_leader[];
 
 extern struct combine_diff_path *diff_tree_paths(
@@ -262,6 +276,9 @@ extern void diff_change(struct diff_options *,
 
 extern struct diff_filepair *diff_unmerge(struct diff_options *, const char 
*path);
 
+void diff_flush_stat(struct diff_filepair *p, struct diff_options *o,
+   struct diffstat_t *diffstat);
+
 #define DIFF_SETUP_REVERSE 1
 #define DIFF_SETUP_USE_CACHE   2
 #define DIFF_SETUP_USE_SIZE_CACHE  4
-- 
2.7.4 (Apple Git-66)



[RFC] [GSoC] Port git-add--interactive.perl:status_cmd to C

2017-05-01 Thread Daniel Ferreira (theiostream)
Hey there,

So, in the GSoC proposal I sent about porting git-add--interactive to
C[1], I expected I would be able to do a couple of small patches to
git-add to familiarize myself with the Git API and have a better clue
of how the porting process would go by. Due to the unexpected size my
microproject ended up taking (though in a good way, I think) I chose
to focus on that and ended up not sending those anticipated patches.
However, I *did* have time to study how I'd go for the port more
closely, and I'd like some comments on what you think of it. Be I
accepted or not into GSoC some days from now, I think the topic might
interest others that might want to tackle this task.

As I went through the code, I grew to believe that a good way to make
a step-by-step conversion might be to get add--interactive "commands"
(as in status_cmd, update_cmd, add_untracked_cmd etc.) ported one at a
time over a set of patch series. For each series, one X_cmd Perl
subroutine would become a one-line call to
"git-add-interactive--helper X " (a builtin). This would make
understanding a function as complex as list_and_choose() to being
ported to C way easier: for each command, a new functionality would be
added on-demand instead of having the whole function sent over at
once. Afterwards, a final series would likely set up the "interactive
main loop" in C and retire the Perl script.

I tried to apply this philosophy to porting `status_cmd` to C, and
easily got most of the configuration/color logic into C by mimicking
what builtins like clean.c do.

The next issue I came up with (and do not yet have a solution) is how
reproduce git-add--interactive's list_modified(). The Perl script runs
git-diff-index and git-diff-files, does some clever string parsing of
their outputs and "merges" the output of both in a single table to
display the number of added/removed lines in a given file BOTH between
HEAD+index and index+tree.

Reproducing either of these comparisons "natively" would simply
require running run_diff_index() or run_diff_files() with
DIFF_FORMAT_NUMSTAT and tweaking diff_flush() to format appropriately
for the "interactive--add case". Furthermore, the current output
prefix options would manage to cover the eventual case of `update_cmd`
selecting files. To output both comparisons, however, I can't come up
with any simple solution. Would you have any ideas?

-- Daniel.

[1]: 
https://public-inbox.org/git/caea2_rjef4vjgcaux8a1kwh1-vxllmv1--vjf9wieqof+gv...@mail.gmail.com/


[PATCH v11 3/5] dir_iterator: refactor dir_iterator_advance

2017-04-26 Thread Daniel Ferreira
Factor out reusable helpers out of dir_iterator_advance(). Make
dir_iterator_advance()'s code more legible and allow some behavior to
be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 66 ++
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..d168cb2 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,44 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };
 
+static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static int pop_dir_level(struct dir_iterator_int *iter)
+{
+   return --iter->levels_nr;
+}
+
+static int adjust_iterator_data(struct dir_iterator_int *iter,
+   struct dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +122,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +138,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
 
continue;
@@ -129,7 +163,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));
 
level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +172,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;
 
strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }
 
-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (adjust_iterator_data(iter, level))
+   continue;
 
return ITER_OK;
}
-- 
2.7.4 (Apple Git-66)



[PATCH v11 5/5] remove_subtree(): reimplement using iterators

2017-04-26 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 42 --
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index d2b512d..a939432 100644
--- a/entry.c
+++ b/entry.c
@@ -3,6 +3,8 @@
 #include "dir.h"
 #include "streaming.h"
 #include "submodule.h"
+#include "iterator.h"
+#include "dir-iterator.h"
 
 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -45,33 +47,21 @@ static void create_directories(const char *path, int 
path_len,
free(buf);
 }
 
-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
-   die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
+   struct dir_iterator *diter = dir_iterator_begin(path,
+   DIR_ITERATOR_POST_ORDER_TRAVERSAL | DIR_ITERATOR_LIST_ROOT_DIR);
+   if (!diter) {
+   die_errno("cannot remove path '%s'", path);
+   }
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", diter->path.buf);
+   } else if (unlink(diter->path.buf))
+   die_errno("cannot unlink '%s'", diter->path.buf);
}
-   closedir(dir);
-   if (rmdir(path->buf))
-   die_errno("cannot rmdir '%s'", path->buf);
 }
 
 static int create_file(const char *path, unsigned int mode)
@@ -312,7 +302,7 @@ int checkout_entry(struct cache_entry *ce,
return 0;
if (!state->force)
return error("%s is a directory", path.buf);
-   remove_subtree();
+   remove_subtree(path.buf);
} else if (unlink(path.buf))
return error_errno("unable to unlink old '%s'", 
path.buf);
} else if (state->not_new)
-- 
2.7.4 (Apple Git-66)



[PATCH v11 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-04-26 Thread Daniel Ferreira
This is the eleventh version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use
dir_iterator.

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
v4: 
https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
v5: 
https://public-inbox.org/git/1490844730-47634-1-git-send-email-bnm...@gmail.com/T/#m2323f15e45de699f2e09364f40a62e17047cf453
v6: 
https://public-inbox.org/git/1491107726-21504-1-git-send-email-bnm...@gmail.com/T/#t
v7: 
https://public-inbox.org/git/1491163388-41255-1-git-send-email-bnm...@gmail.com/T/#t
v8: 
https://public-inbox.org/git/a60b2ed6-2b99-b134-05af-7c8492a69...@alum.mit.edu/T/#t
v9: 
https://public-inbox.org/git/cagz79kabrs0sfavrv4mn7-mvk+8qmpkpjmd55zpq+a14zzy...@mail.gmail.com/T/#me8988b7dd4adbc4ea24946ccb24fc1cf7baf44e3
v10: 
https://public-inbox.org/git/xmqqk26fahjn@gitster.mtv.corp.google.com/T/#m3071006ec67457adf69578b37f55b625d0e7fed7

Travis CI build: https://travis-ci.org/theiostream/git/builds/226079792

Okay, in this version I factored in Junio's request for a test rename
to t0066, and most of Michael's suggestions from the last review. I'm
sorry for the delay on this one.

Instead of either removing the iterate root dir feature or return NULL
as its basename, I chose to get the real_path() out of the dir we are
iterating over and get the basename of that, to avoid the "/." or "/.."
issues. I think this is actually less complex than the NULL solution in
terms of code that would end up needing to be written, and I think the
root dir feature is handy to have on dir-iterator.

As for the suggestion to put strerror() on the test, I feared for the
message compatibility across platforms (since we actually check which
errno code we got). If you could give me a guarantee that this is not a
problem and you think it'd be worthy of yet another series, I'm up for
it.

As for Junio's concern for a rebase issue on the test script, the
reason for it was that the same file is used across two tests, so it
seemed unnecessary to recreate the file within each of them.

Daniel Ferreira (5):
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): test removing nested directories
  dir_iterator: refactor dir_iterator_advance
  dir_iterator: rewrite state machine model
  remove_subtree(): reimplement using iterators

 Makefile|   1 +
 dir-iterator.c  | 244 
 dir-iterator.h  |  35 --
 entry.c |  42 +++
 refs/files-backend.c|  15 ++-
 t/helper/.gitignore |   1 +
 t/helper/test-dir-iterator.c|  53 +
 t/t0066-dir-iterator.sh | 121 
 t/t2000-checkout-cache-clash.sh |  11 ++
 9 files changed, 416 insertions(+), 107 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0066-dir-iterator.sh

--
2.7.4 (Apple Git-66)



[PATCH v11 1/5] dir_iterator: add tests for dir_iterator API

2017-04-26 Thread Daniel Ferreira
Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0066-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 Makefile |  1 +
 t/helper/.gitignore  |  1 +
 t/helper/test-dir-iterator.c | 30 
 t/t0066-dir-iterator.sh  | 55 
 4 files changed, 87 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0066-dir-iterator.sh

diff --git a/Makefile b/Makefile
index eb1a1a7..a669e43 100644
--- a/Makefile
+++ b/Makefile
@@ -614,6 +614,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index acd5db1..60adab5 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -3,6 +3,7 @@
 /test-config
 /test-date
 /test-delta
+/test-dir-iterator
 /test-dump-cache-tree
 /test-dump-split-index
 /test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 000..a7d1470
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,30 @@
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv)
+{
+   struct strbuf path = STRBUF_INIT;
+   struct dir_iterator *diter;
+
+   if (argc < 2)
+   die("BUG: test-dir-iterator needs one argument");
+
+   strbuf_add(, argv[1], strlen(argv[1]));
+
+   diter = dir_iterator_begin(path.buf);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode))
+   printf("[d] ");
+   else if (S_ISREG(diter->st.st_mode))
+   printf("[f] ");
+   else
+   printf("[?] ");
+
+   printf("(%s) [%s] %s\n", diter->relative_path, diter->basename, 
diter->path.buf);
+   }
+
+   return 0;
+}
diff --git a/t/t0066-dir-iterator.sh b/t/t0066-dir-iterator.sh
new file mode 100755
index 000..46e5ce5
--- /dev/null
+++ b/t/t0066-dir-iterator.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+   mkdir -p dir &&
+   mkdir -p dir/a/b/c/ &&
+   >dir/b &&
+   >dir/c &&
+   mkdir -p dir/d/e/d/ &&
+   >dir/a/b/c/d &&
+   >dir/a/e &&
+   >dir/d/e/d/a &&
+
+   mkdir -p dir2/a/b/c/ &&
+   >dir2/a/b/c/d
+'
+
+test_expect_success 'dir-iterator should iterate through all files' '
+   cat >expect-sorted-output <<-\EOF &&
+   [d] (a) [a] ./dir/a
+   [d] (a/b) [b] ./dir/a/b
+   [d] (a/b/c) [c] ./dir/a/b/c
+   [d] (d) [d] ./dir/d
+   [d] (d/e) [e] ./dir/d/e
+   [d] (d/e/d) [d] ./dir/d/e/d
+   [f] (a/b/c/d) [d] ./dir/a/b/c/d
+   [f] (a/e) [e] ./dir/a/e
+   [f] (b) [b] ./dir/b
+   [f] (c) [c] ./dir/c
+   [f] (d/e/d/a) [a] ./dir/d/e/d/a
+   EOF
+
+   test-dir-iterator ./dir >out &&
+   sort ./actual-pre-order-sorted-output &&
+
+   test_cmp expect-sorted-output actual-pre-order-sorted-output
+'
+
+test_expect_success 'dir-iterator should list files in the correct order' '
+   cat >expect-pre-order-output <<-\EOF &&
+   [d] (a) [a] ./dir2/a
+   [d] (a/b) [b] ./dir2/a/b
+   [d] (a/b/c) [c] ./dir2/a/b/c
+   [f] (a/b/c/d) [d] ./dir2/a/b/c/d
+   EOF
+
+   test-dir-iterator ./dir2 >actual-pre-order-output &&
+
+   test_cmp expect-pre-order-output actual-pre-order-output
+'
+
+test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v11 4/5] dir_iterator: rewrite state machine model

2017-04-26 Thread Daniel Ferreira
Perform a rewrite of dir_iterator_advance(). dir_iterator has
ceased to rely on a combination of level.initialized and level.dir_state
state variables and now only tracks the state with level.dir_state,
which simplifies the iterator mechanism, makes the code easier to follow
and eases additions of new features to the iterator.

Make dir_iterator_begin() attempt to lstat() the path it receives, and
return NULL and an appropriate errno if it fails or if the passed path
was not a directory.

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18). This is useful for
recursively removing a directory and calling rmdir() on a directory only
after all of its contents have been wiped.

Add an option for dir_iterator to also iterate over the initial
directory (the one passed to dir_iterator_begin()).

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned new features to be enabled. The new default behavior
(i.e. flags set to 0) does not iterate over directories. Flag
DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
iterates over the initial directory. These flags do not conflict with
each other and may be used simultaneously.

Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
the flags parameter introduced, as well as handle the case in which it
fails to open the directory.

Improve t/t0066-dir-iterator.sh and t/helper/test-dir-iterator.c to
test "post-order" and "iterate-over-root" modes.

Michael Haggerty contributed with the design of the new
dir_iterator_advance() implementation, the code for
t/helper/test-dir-iterator's option parser and numerous reviews that
gradually shaped this code to its current form.

Signed-off-by: Michael Haggerty <mhag...@alum.mit.edu>
Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c   | 212 ++-
 dir-iterator.h   |  35 +--
 refs/files-backend.c |  15 ++-
 t/helper/test-dir-iterator.c |  31 ++-
 t/t0066-dir-iterator.sh  | 104 +
 5 files changed, 299 insertions(+), 98 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index d168cb2..fba8f49 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;
 
/*
@@ -20,9 +18,15 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
-   DIR_STATE_ITER,
-   DIR_STATE_RECURSE
+   DIR_STATE_PUSHED,
+   DIR_STATE_PRE_ITERATION,
+   DIR_STATE_ITERATING,
+   DIR_STATE_POST_ITERATION,
+   DIR_STATE_EXHAUSTED
} dir_state;
+
+   /* The stat structure for the directory this level represents. */
+   struct stat st;
 };
 
 /*
@@ -48,15 +52,23 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
-static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+static void push_dir_level(struct dir_iterator_int *iter, struct stat *st)
 {
-   level->dir_state = DIR_STATE_RECURSE;
+   struct dir_iterator_level *level;
+
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir = NULL;
+   level->dir_state = DIR_STATE_PUSHED;
+   level->st = *st;
 }
 
 static int pop_dir_level(struct dir_iterator_int *iter)
@@ -67,7 +79,9 @@ static int pop_dir_level(struct dir_iterator_int *iter)
 static int adjust_iterator_data(struct dir_iterator_int *iter,
struct dir_iterator_level *level)
 {
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (level->dir_state != DIR_STATE_ITERATING) {
+   iter->base.st = level->st;
+   } else if (lstat(iter->base.path.buf, >base.st) < 0) {
if (errno != ENOENT)
warning("error reading path '%s': %s",
iter->base.path.buf,
@@ -76,18 +90,52 @@ static int adjust_iterator_data(struct dir_iterator_int 
*iter,
}
 
/*
-* We have to set these each time because
-* the path strbuf might h

[PATCH v11 2/5] remove_subtree(): test removing nested directories

2017-04-26 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '
 
+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v10 5/5] remove_subtree(): reimplement using iterators

2017-04-19 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 42 --
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index d2b512d..a939432 100644
--- a/entry.c
+++ b/entry.c
@@ -3,6 +3,8 @@
 #include "dir.h"
 #include "streaming.h"
 #include "submodule.h"
+#include "iterator.h"
+#include "dir-iterator.h"
 
 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -45,33 +47,21 @@ static void create_directories(const char *path, int 
path_len,
free(buf);
 }
 
-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
-   die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
+   struct dir_iterator *diter = dir_iterator_begin(path,
+   DIR_ITERATOR_POST_ORDER_TRAVERSAL | DIR_ITERATOR_LIST_ROOT_DIR);
+   if (!diter) {
+   die_errno("cannot remove path '%s'", path);
+   }
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", diter->path.buf);
+   } else if (unlink(diter->path.buf))
+   die_errno("cannot unlink '%s'", diter->path.buf);
}
-   closedir(dir);
-   if (rmdir(path->buf))
-   die_errno("cannot rmdir '%s'", path->buf);
 }
 
 static int create_file(const char *path, unsigned int mode)
@@ -312,7 +302,7 @@ int checkout_entry(struct cache_entry *ce,
return 0;
if (!state->force)
return error("%s is a directory", path.buf);
-   remove_subtree();
+   remove_subtree(path.buf);
} else if (unlink(path.buf))
return error_errno("unable to unlink old '%s'", 
path.buf);
} else if (state->not_new)
-- 
2.7.4 (Apple Git-66)



[PATCH v10 4/5] dir_iterator: rewrite state machine model

2017-04-19 Thread Daniel Ferreira
Perform a rewrite of dir_iterator_advance(). dir_iterator has
ceased to rely on a combination of level.initialized and level.dir_state
state variables and now only tracks the state with level.dir_state,
which simplifies the iterator mechanism, makes the code easier to follow
and eases additions of new features to the iterator.

Make dir_iterator_begin() attempt to lstat() the path it receives, and
return NULL and an appropriate errno if it fails or if the passed path
was not a directory.

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18). This is useful for
recursively removing a directory and calling rmdir() on a directory only
after all of its contents have been wiped.

Add an option for dir_iterator to also iterate over the initial
directory (the one passed to dir_iterator_begin()).

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned new features to be enabled. The new default behavior
(i.e. flags set to 0) does not iterate over directories. Flag
DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
iterates over the initial directory. These flags do not conflict with
each other and may be used simultaneously.

Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
the flags parameter introduced, as well as handle the case in which it
fails to open the directory.

Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
test "post-order" and "iterate-over-root" modes.

Michael Haggerty contributed with the design of the new
dir_iterator_advance() implementation, the code for
t/helper/test-dir-iterator's option parser and numerous reviews that
gradually shaped this code to its current form.

Signed-off-by: Michael Haggerty <mhag...@alum.mit.edu>
Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c   | 220 ++-
 dir-iterator.h   |  35 +--
 refs/files-backend.c |  51 ++
 t/helper/test-dir-iterator.c |  31 +-
 t/t0065-dir-iterator.sh  |  94 ++
 5 files changed, 316 insertions(+), 115 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index d168cb2..3c20e0e 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;
 
/*
@@ -20,9 +18,15 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
-   DIR_STATE_ITER,
-   DIR_STATE_RECURSE
+   DIR_STATE_PUSHED,
+   DIR_STATE_PRE_ITERATION,
+   DIR_STATE_ITERATING,
+   DIR_STATE_POST_ITERATION,
+   DIR_STATE_EXHAUSTED
} dir_state;
+
+   /* The stat structure for the directory this level represents. */
+   struct stat st;
 };
 
 /*
@@ -48,15 +52,23 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
-static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+static void push_dir_level(struct dir_iterator_int *iter, struct stat *st)
 {
-   level->dir_state = DIR_STATE_RECURSE;
+   struct dir_iterator_level *level;
+
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir = NULL;
+   level->dir_state = DIR_STATE_PUSHED;
+   level->st = *st;
 }
 
 static int pop_dir_level(struct dir_iterator_int *iter)
@@ -67,7 +79,11 @@ static int pop_dir_level(struct dir_iterator_int *iter)
 static int adjust_iterator_data(struct dir_iterator_int *iter,
struct dir_iterator_level *level)
 {
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   char *last_path_component;
+
+   if (level->dir_state != DIR_STATE_ITERATING) {
+   iter->base.st = level->st;
+   } else if (lstat(iter->base.path.buf, >base.st) < 0) {
if (errno != ENOENT)
warning("error reading path '%s': %s",
iter->base.path.buf,
@@ -76,18 +92,48 @@ static int adjust_iterator_data(struct dir_iterator_int 
*iter,
}
 
/*
-* We have to set these 

[PATCH v10 2/5] remove_subtree(): test removing nested directories

2017-04-19 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '
 
+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v10 3/5] dir_iterator: refactor dir_iterator_advance

2017-04-19 Thread Daniel Ferreira
Factor out reusable helpers out of dir_iterator_advance(). Make
dir_iterator_advance()'s code more legible and allow some behavior to
be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 66 ++
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..d168cb2 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,44 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };
 
+static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static int pop_dir_level(struct dir_iterator_int *iter)
+{
+   return --iter->levels_nr;
+}
+
+static int adjust_iterator_data(struct dir_iterator_int *iter,
+   struct dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +122,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +138,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
 
continue;
@@ -129,7 +163,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));
 
level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +172,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;
 
strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }
 
-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (adjust_iterator_data(iter, level))
+   continue;
 
return ITER_OK;
}
-- 
2.7.4 (Apple Git-66)



[PATCH v10 1/5] dir_iterator: add tests for dir_iterator API

2017-04-19 Thread Daniel Ferreira
Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 Makefile |  1 +
 t/helper/.gitignore  |  1 +
 t/helper/test-dir-iterator.c | 30 
 t/t0065-dir-iterator.sh  | 55 
 4 files changed, 87 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

diff --git a/Makefile b/Makefile
index d595ea3..a5c1ac0 100644
--- a/Makefile
+++ b/Makefile
@@ -614,6 +614,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 758ed2e..a7d74d3 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -3,6 +3,7 @@
 /test-config
 /test-date
 /test-delta
+/test-dir-iterator
 /test-dump-cache-tree
 /test-dump-split-index
 /test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 000..a7d1470
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,30 @@
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv)
+{
+   struct strbuf path = STRBUF_INIT;
+   struct dir_iterator *diter;
+
+   if (argc < 2)
+   die("BUG: test-dir-iterator needs one argument");
+
+   strbuf_add(, argv[1], strlen(argv[1]));
+
+   diter = dir_iterator_begin(path.buf);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode))
+   printf("[d] ");
+   else if (S_ISREG(diter->st.st_mode))
+   printf("[f] ");
+   else
+   printf("[?] ");
+
+   printf("(%s) [%s] %s\n", diter->relative_path, diter->basename, 
diter->path.buf);
+   }
+
+   return 0;
+}
diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
new file mode 100755
index 000..46e5ce5
--- /dev/null
+++ b/t/t0065-dir-iterator.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+   mkdir -p dir &&
+   mkdir -p dir/a/b/c/ &&
+   >dir/b &&
+   >dir/c &&
+   mkdir -p dir/d/e/d/ &&
+   >dir/a/b/c/d &&
+   >dir/a/e &&
+   >dir/d/e/d/a &&
+
+   mkdir -p dir2/a/b/c/ &&
+   >dir2/a/b/c/d
+'
+
+test_expect_success 'dir-iterator should iterate through all files' '
+   cat >expect-sorted-output <<-\EOF &&
+   [d] (a) [a] ./dir/a
+   [d] (a/b) [b] ./dir/a/b
+   [d] (a/b/c) [c] ./dir/a/b/c
+   [d] (d) [d] ./dir/d
+   [d] (d/e) [e] ./dir/d/e
+   [d] (d/e/d) [d] ./dir/d/e/d
+   [f] (a/b/c/d) [d] ./dir/a/b/c/d
+   [f] (a/e) [e] ./dir/a/e
+   [f] (b) [b] ./dir/b
+   [f] (c) [c] ./dir/c
+   [f] (d/e/d/a) [a] ./dir/d/e/d/a
+   EOF
+
+   test-dir-iterator ./dir >out &&
+   sort ./actual-pre-order-sorted-output &&
+
+   test_cmp expect-sorted-output actual-pre-order-sorted-output
+'
+
+test_expect_success 'dir-iterator should list files in the correct order' '
+   cat >expect-pre-order-output <<-\EOF &&
+   [d] (a) [a] ./dir2/a
+   [d] (a/b) [b] ./dir2/a/b
+   [d] (a/b/c) [c] ./dir2/a/b/c
+   [f] (a/b/c/d) [d] ./dir2/a/b/c/d
+   EOF
+
+   test-dir-iterator ./dir2 >actual-pre-order-output &&
+
+   test_cmp expect-pre-order-output actual-pre-order-output
+'
+
+test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v10 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-04-19 Thread Daniel Ferreira
This is the tenth version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use
dir_iterator.

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
v4: 
https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
v5: 
https://public-inbox.org/git/1490844730-47634-1-git-send-email-bnm...@gmail.com/T/#m2323f15e45de699f2e09364f40a62e17047cf453
v6: 
https://public-inbox.org/git/1491107726-21504-1-git-send-email-bnm...@gmail.com/T/#t
v7: 
https://public-inbox.org/git/1491163388-41255-1-git-send-email-bnm...@gmail.com/T/#t
v8: 
https://public-inbox.org/git/a60b2ed6-2b99-b134-05af-7c8492a69...@alum.mit.edu/T/#t
v9: 
https://public-inbox.org/git/cagz79kabrs0sfavrv4mn7-mvk+8qmpkpjmd55zpq+a14zzy...@mail.gmail.com/T/#me8988b7dd4adbc4ea24946ccb24fc1cf7baf44e3

Travis CI build: https://travis-ci.org/theiostream/git/builds/223542902

I do not know if "queuing" meant I did not have to change this series
any further (specially after Stefan's "ok"), but anyway, this series
applies Junio's corrections back from v9, that were mostly regarding
commit messages or style. I hope I got them right.

The only point I was in doubt was about Michael's signoff. Actually, he
gave it not regarding the snippet for the new dir_iterator_advance()
logic, but to a small piece of actual code he wrote on the new dir
iterator test helper[1]. Thus I don't know whether this would require his
signoff to come before or after mine. Regardless, proper credit has
been given in the commit message, as suggested. In the end, I kept
his before mine, but I suppose that can be adjusted by Junio if
necessary.

I also didn't get whether I myself should have renamed t0065 to t0066
given the other queued patch. I kept it as t0065 since I figured it
would be weird for this patch as a unit to "skip" a number.

Once again, thanks for all the time invested in the reviews for this
patch.

[1]: 
https://public-inbox.org/git/1491163388-41255-1-git-send-email-bnm...@gmail.com/T/#m187b9e681e3369862ccc6083bbf6596cd2e19cd4

Daniel Ferreira (5):
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): test removing nested directories
  dir_iterator: refactor dir_iterator_advance
  dir_iterator: rewrite state machine model
  remove_subtree(): reimplement using iterators

 Makefile|   1 +
 dir-iterator.c  | 252 +---
 dir-iterator.h  |  35 --
 entry.c |  42 +++
 refs/files-backend.c|  51 +---
 t/helper/.gitignore |   1 +
 t/helper/test-dir-iterator.c|  53 +
 t/t0065-dir-iterator.sh | 111 ++
 t/t2000-checkout-cache-clash.sh |  11 ++
 9 files changed, 433 insertions(+), 124 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

--
2.7.4 (Apple Git-66)



[PATCH v9 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-04-17 Thread Daniel Ferreira
This is the ninth version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use
dir_iterator.

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
v4: 
https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
v5: 
https://public-inbox.org/git/1490844730-47634-1-git-send-email-bnm...@gmail.com/T/#m2323f15e45de699f2e09364f40a62e17047cf453
v6: 
https://public-inbox.org/git/1491107726-21504-1-git-send-email-bnm...@gmail.com/T/#t
v7: 
https://public-inbox.org/git/1491163388-41255-1-git-send-email-bnm...@gmail.com/T/#t
v8: 
https://public-inbox.org/git/a60b2ed6-2b99-b134-05af-7c8492a69...@alum.mit.edu/T/#t

Travis CI build: https://travis-ci.org/theiostream/git/builds/222919383

I think this is the closest to a final version we've ever gotten. I
followed all of Michael and Stefan's suggestions on top of v8, and with
Michael's endorsement made dir_iterator_begin() return NULL and set
errno appropriately in case of an error.

On second thought, maybe the extra code complexity required from
dir_iterator_begin()'s callers might be actually an advantage as
dir_iterator grows to tackle more complex dir traversing challenges on
Git. After all, we might want some special behavior depending on what
the given `path` is instead of always considering it valid and later
behaving as if it was an empty directory.

Thanks again for the reviews.

Daniel Ferreira (5):
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): test removing nested directories
  dir_iterator: add helpers to dir_iterator_advance
  dir_iterator: refactor state machine model
  remove_subtree(): reimplement using iterators

 Makefile|   1 +
 dir-iterator.c  | 252 +---
 dir-iterator.h  |  35 --
 entry.c |  42 +++
 refs/files-backend.c|  51 +---
 t/helper/.gitignore |   1 +
 t/helper/test-dir-iterator.c|  53 +
 t/t0065-dir-iterator.sh | 113 ++
 t/t2000-checkout-cache-clash.sh |  11 ++
 9 files changed, 435 insertions(+), 124 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

--
2.7.4 (Apple Git-66)



[PATCH v9 1/5] dir_iterator: add tests for dir_iterator API

2017-04-17 Thread Daniel Ferreira
Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 Makefile |  1 +
 t/helper/.gitignore  |  1 +
 t/helper/test-dir-iterator.c | 30 
 t/t0065-dir-iterator.sh  | 55 
 4 files changed, 87 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

diff --git a/Makefile b/Makefile
index d595ea3..a5c1ac0 100644
--- a/Makefile
+++ b/Makefile
@@ -614,6 +614,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 758ed2e..a7d74d3 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -3,6 +3,7 @@
 /test-config
 /test-date
 /test-delta
+/test-dir-iterator
 /test-dump-cache-tree
 /test-dump-split-index
 /test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 000..a7d1470
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,30 @@
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv)
+{
+   struct strbuf path = STRBUF_INIT;
+   struct dir_iterator *diter;
+
+   if (argc < 2)
+   die("BUG: test-dir-iterator needs one argument");
+
+   strbuf_add(, argv[1], strlen(argv[1]));
+
+   diter = dir_iterator_begin(path.buf);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode))
+   printf("[d] ");
+   else if (S_ISREG(diter->st.st_mode))
+   printf("[f] ");
+   else
+   printf("[?] ");
+
+   printf("(%s) [%s] %s\n", diter->relative_path, diter->basename, 
diter->path.buf);
+   }
+
+   return 0;
+}
diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
new file mode 100755
index 000..b6d76dd
--- /dev/null
+++ b/t/t0065-dir-iterator.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+   mkdir -p dir &&
+   mkdir -p dir/a/b/c/ &&
+   >dir/b &&
+   >dir/c &&
+   mkdir -p dir/d/e/d/ &&
+   >dir/a/b/c/d &&
+   >dir/a/e &&
+   >dir/d/e/d/a &&
+
+   mkdir -p dir2/a/b/c/ &&
+   >dir2/a/b/c/d
+'
+
+cat >expect-sorted-output <<-\EOF &&
+[d] (a) [a] ./dir/a
+[d] (a/b) [b] ./dir/a/b
+[d] (a/b/c) [c] ./dir/a/b/c
+[d] (d) [d] ./dir/d
+[d] (d/e) [e] ./dir/d/e
+[d] (d/e/d) [d] ./dir/d/e/d
+[f] (a/b/c/d) [d] ./dir/a/b/c/d
+[f] (a/e) [e] ./dir/a/e
+[f] (b) [b] ./dir/b
+[f] (c) [c] ./dir/c
+[f] (d/e/d/a) [a] ./dir/d/e/d/a
+EOF
+
+test_expect_success 'dir-iterator should iterate through all files' '
+   test-dir-iterator ./dir >out &&
+   sort ./actual-pre-order-sorted-output &&
+
+   test_cmp expect-sorted-output actual-pre-order-sorted-output
+'
+
+cat >expect-pre-order-output <<-\EOF &&
+[d] (a) [a] ./dir2/a
+[d] (a/b) [b] ./dir2/a/b
+[d] (a/b/c) [c] ./dir2/a/b/c
+[f] (a/b/c/d) [d] ./dir2/a/b/c/d
+EOF
+
+test_expect_success 'dir-iterator should list files in the correct order' '
+   test-dir-iterator ./dir2 >actual-pre-order-output &&
+
+   test_cmp expect-pre-order-output actual-pre-order-output
+'
+
+test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v9 3/5] dir_iterator: add helpers to dir_iterator_advance

2017-04-17 Thread Daniel Ferreira
Create helpers to dir_iterator_advance(). Make dir_iterator_advance()'s
code more legible and allow some behavior to be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 65 +-
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..9e073a0 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };
 
+static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static int pop_dir_level(struct dir_iterator_int *iter)
+{
+   return --iter->levels_nr;
+}
+
+static int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +121,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +137,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
 
continue;
@@ -129,7 +162,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));
 
level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +171,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;
 
strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }
 
-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (set_iterator_data(iter, level))
+   continue;
 
return ITER_OK;
}
-- 
2.7.4 (Apple Git-66)



[PATCH v9 2/5] remove_subtree(): test removing nested directories

2017-04-17 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '
 
+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v9 5/5] remove_subtree(): reimplement using iterators

2017-04-17 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 42 --
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index d2b512d..a939432 100644
--- a/entry.c
+++ b/entry.c
@@ -3,6 +3,8 @@
 #include "dir.h"
 #include "streaming.h"
 #include "submodule.h"
+#include "iterator.h"
+#include "dir-iterator.h"
 
 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -45,33 +47,21 @@ static void create_directories(const char *path, int 
path_len,
free(buf);
 }
 
-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
-   die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
+   struct dir_iterator *diter = dir_iterator_begin(path,
+   DIR_ITERATOR_POST_ORDER_TRAVERSAL | DIR_ITERATOR_LIST_ROOT_DIR);
+   if (!diter) {
+   die_errno("cannot remove path '%s'", path);
+   }
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", diter->path.buf);
+   } else if (unlink(diter->path.buf))
+   die_errno("cannot unlink '%s'", diter->path.buf);
}
-   closedir(dir);
-   if (rmdir(path->buf))
-   die_errno("cannot rmdir '%s'", path->buf);
 }
 
 static int create_file(const char *path, unsigned int mode)
@@ -312,7 +302,7 @@ int checkout_entry(struct cache_entry *ce,
return 0;
if (!state->force)
return error("%s is a directory", path.buf);
-   remove_subtree();
+   remove_subtree(path.buf);
} else if (unlink(path.buf))
return error_errno("unable to unlink old '%s'", 
path.buf);
} else if (state->not_new)
-- 
2.7.4 (Apple Git-66)



[PATCH v9 4/5] dir_iterator: refactor state machine model

2017-04-17 Thread Daniel Ferreira
Perform major refactor of dir_iterator_advance(). dir_iterator has
ceased to rely on a convoluted state machine mechanism of two loops and
two state variables (level.initialized and level.dir_state). This serves
to ease comprehension of the iterator mechanism and ease addition of new
features to the iterator.

Make dir_iterator_begin() attempt to lstat() the path it receives, and
return NULL and an appropriate errno if it fails or if the passed path
was not a directory.

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18). This is useful for
recursively removing a directory and calling rmdir() on a directory only
after all of its contents have been wiped.

Add an option for the dir_iterator API to iterate over the root
directory (the one it was initialized with) as well.

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned new features to be enabled. The new default behavior
(i.e. flags set to 0) does not iterate over directories. Flag
DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
iterates over the root directory. These flags do not conflict with each
other and may be used simultaneously.

Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
the flags parameter introduced, as well as handle the case in which it
fails to open the directory.

Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
test "post-order" and "iterate-over-root" modes.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
Signed-off-by: Michael Haggerty <mhag...@alum.mit.edu>
---
 dir-iterator.c   | 223 ++-
 dir-iterator.h   |  35 +--
 refs/files-backend.c |  51 ++
 t/helper/test-dir-iterator.c |  31 +-
 t/t0065-dir-iterator.sh  |  66 -
 5 files changed, 305 insertions(+), 101 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 9e073a0..9394797 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;
 
/*
@@ -20,9 +18,15 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
-   DIR_STATE_ITER,
-   DIR_STATE_RECURSE
+   DIR_STATE_PUSHED,
+   DIR_STATE_PRE_ITERATION,
+   DIR_STATE_ITERATING,
+   DIR_STATE_POST_ITERATION,
+   DIR_STATE_EXHAUSTED
} dir_state;
+
+   /* The stat structure for the directory this level represents. */
+   struct stat st;
 };
 
 /*
@@ -48,15 +52,23 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
-static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+static void push_dir_level(struct dir_iterator_int *iter, struct stat *st)
 {
-   level->dir_state = DIR_STATE_RECURSE;
+   struct dir_iterator_level *level;
+
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir = NULL;
+   level->dir_state = DIR_STATE_PUSHED;
+   level->st = *st;
 }
 
 static int pop_dir_level(struct dir_iterator_int *iter)
@@ -64,9 +76,14 @@ static int pop_dir_level(struct dir_iterator_int *iter)
return --iter->levels_nr;
 }
 
-static int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+static int set_iterator_data(struct dir_iterator_int *iter,
+   struct dir_iterator_level *level)
 {
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   char *last_path_component;
+
+   if (level->dir_state != DIR_STATE_ITERATING) {
+   iter->base.st = level->st;
+   } else if (lstat(iter->base.path.buf, >base.st) < 0) {
if (errno != ENOENT)
warning("error reading path '%s': %s",
iter->base.path.buf,
@@ -75,18 +92,48 @@ static int set_iterator_data(struct dir_iterator_int *iter, 
struct dir_iterator_
}
 
/*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
+* Check if w

Re: [PATCH v8 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-04-12 Thread Daniel Ferreira (theiostream)
Hey there! I'm sorry for bothering you, but any chance you might have
overlooked this patch for a review? (I'm just not familiar with how
long patches usually take to be reviewed, and since I always got an
answer within two days of sending it I wondered if you may have just
not noticed it.)

-- Daniel.

On Wed, Apr 5, 2017 at 10:39 PM, Daniel Ferreira <bnm...@gmail.com> wrote:
> This is the seventh version of a patch series that implements the GSoC
> microproject of converting a recursive call to readdir() to use dir_iterator.
>
> v1: 
> https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
> v2: 
> https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
> v3: 
> https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
> v4: 
> https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
> v5: 
> https://public-inbox.org/git/1490844730-47634-1-git-send-email-bnm...@gmail.com/T/#m2323f15e45de699f2e09364f40a62e17047cf453
> v6: 
> https://public-inbox.org/git/1491107726-21504-1-git-send-email-bnm...@gmail.com/T/#t
> v7: 
> https://public-inbox.org/git/1491163388-41255-1-git-send-email-bnm...@gmail.com/T/#t
>
> Travis CI build: https://travis-ci.org/theiostream/git/jobs/21982
>
> In this version, I applied pretty much all suggestions Michael and Stefan had
> for the patch.
>
> Michael, regarding the patch you sent for parsing the arguments on the
> test-dir-iterator helper, I did not squash because it'd generate too many
> merge conflicts. I just preferred add your code manually. Let me know how I
> can properly credit you for it.
>
> The only "controversial" part of this code is how I ended up caching the 
> result
> of lstat() on the dir_iterator_level struct. Having to handle the case of the
> root directory ended up making set_iterator_data() way more complicated 
> (having
> to handle the case of level->stat not being set by push_dir_level()), as well
> as required the introduction of st_is_set member in the level struct. This 
> issue
> would be solved if we could lstat() the root dir on dir_iterator_begin() and
> possibly return an error there. Regardless, I appreciate other suggestions to
> make this less complex.
>
> Daniel Ferreira (5):
>   dir_iterator: add tests for dir_iterator API
>   remove_subtree(): test removing nested directories
>   dir_iterator: add helpers to dir_iterator_advance
>   dir_iterator: refactor state machine model
>   remove_subtree(): reimplement using iterators
>
>  Makefile|   1 +
>  dir-iterator.c  | 247 
> +---
>  dir-iterator.h  |  35 --
>  entry.c |  39 +++
>  refs/files-backend.c|   2 +-
>  t/helper/.gitignore |   1 +
>  t/helper/test-dir-iterator.c|  48 
>  t/t0065-dir-iterator.sh |  93 +++
>  t/t2000-checkout-cache-clash.sh |  11 ++
>  9 files changed, 373 insertions(+), 104 deletions(-)
>  create mode 100644 t/helper/test-dir-iterator.c
>  create mode 100755 t/t0065-dir-iterator.sh
>
> --
> 2.7.4 (Apple Git-66)
>


[PATCH v8 2/5] remove_subtree(): test removing nested directories

2017-04-05 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '
 
+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v8 4/5] dir_iterator: refactor state machine model

2017-04-05 Thread Daniel Ferreira
Perform major refactor of dir_iterator_advance(). dir_iterator has
ceased to rely on a convoluted state machine mechanism of two loops and
two state variables (level.initialized and level.dir_state). This serves
to ease comprehension of the iterator mechanism and ease addition of new
features to the iterator.

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18). This is useful for
recursively removing a directory and calling rmdir() on a directory only
after all of its contents have been wiped.

Add an option for the dir_iterator API to iterate over the root
directory (the one it was initialized with) as well.

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned new features to be enabled. The new default behavior
(i.e. flags set to 0) does not iterate over directories. Flag
DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
iterates over the root directory. These flags do not conflict with each
other and may be used simultaneously.

Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
the flags parameter introduced.

Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
test "post-order" and "iterate-over-root" modes.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c   | 216 ++-
 dir-iterator.h   |  35 +--
 refs/files-backend.c |   2 +-
 t/helper/test-dir-iterator.c |  27 +-
 t/t0065-dir-iterator.sh  |  44 -
 5 files changed, 245 insertions(+), 79 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 9e073a0..4c919d1 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;
 
/*
@@ -20,9 +18,20 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
-   DIR_STATE_ITER,
-   DIR_STATE_RECURSE
+   DIR_STATE_PUSHED,
+   DIR_STATE_PRE_ITERATION,
+   DIR_STATE_ITERATING,
+   DIR_STATE_POST_ITERATION,
+   DIR_STATE_EXHAUSTED
} dir_state;
+
+   /*
+* The stat structure for the directory this level represents.
+* It comes with a st_is_set flag which indicates whether it is,
+* in fact, set.
+*/
+   unsigned st_is_set : 1;
+   struct stat st;
 };
 
 /*
@@ -48,15 +57,23 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
-static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+static void push_dir_level(struct dir_iterator_int *iter,
+   struct dir_iterator_level *level, struct stat *st)
 {
-   level->dir_state = DIR_STATE_RECURSE;
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir = NULL;
+   level->dir_state = DIR_STATE_PUSHED;
+   level->st_is_set = 1;
+   level->st = *st;
 }
 
 static int pop_dir_level(struct dir_iterator_int *iter)
@@ -64,29 +81,73 @@ static int pop_dir_level(struct dir_iterator_int *iter)
return --iter->levels_nr;
 }
 
-static int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+static int set_iterator_data(struct dir_iterator_int *iter,
+   struct dir_iterator_level *level)
 {
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   char *last_path_component;
+
+   if (level->dir_state != DIR_STATE_ITERATING && level->st_is_set) {
+   iter->base.st = level->st;
+   } else if (lstat(iter->base.path.buf, >base.st) < 0) {
if (errno != ENOENT)
warning("error reading path '%s': %s",
iter->base.path.buf,
strerror(errno));
+   else if (!level->st_is_set) {
+   /*
+* When we are iterating over the root directory, we
+* are forced to initialize the level stat here, since
+* we cannot lstat() on dir_iterato

[PATCH v8 3/5] dir_iterator: add helpers to dir_iterator_advance

2017-04-05 Thread Daniel Ferreira
Create helpers to dir_iterator_advance(). Make dir_iterator_advance()'s
code more legible and allow some behavior to be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 65 +-
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..9e073a0 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };
 
+static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static int pop_dir_level(struct dir_iterator_int *iter)
+{
+   return --iter->levels_nr;
+}
+
+static int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +121,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +137,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
 
continue;
@@ -129,7 +162,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));
 
level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +171,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;
 
strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }
 
-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (set_iterator_data(iter, level))
+   continue;
 
return ITER_OK;
}
-- 
2.7.4 (Apple Git-66)



[PATCH v8 5/5] remove_subtree(): reimplement using iterators

2017-04-05 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 39 +--
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index d2b512d..d543ccf 100644
--- a/entry.c
+++ b/entry.c
@@ -3,6 +3,8 @@
 #include "dir.h"
 #include "streaming.h"
 #include "submodule.h"
+#include "iterator.h"
+#include "dir-iterator.h"
 
 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -45,33 +47,18 @@ static void create_directories(const char *path, int 
path_len,
free(buf);
 }
 
-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
-   die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
+   struct dir_iterator *diter = dir_iterator_begin(path,
+   DIR_ITERATOR_POST_ORDER_TRAVERSAL | DIR_ITERATOR_LIST_ROOT_DIR);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", diter->path.buf);
+   } else if (unlink(diter->path.buf))
+   die_errno("cannot unlink '%s'", diter->path.buf);
}
-   closedir(dir);
-   if (rmdir(path->buf))
-   die_errno("cannot rmdir '%s'", path->buf);
 }
 
 static int create_file(const char *path, unsigned int mode)
@@ -312,7 +299,7 @@ int checkout_entry(struct cache_entry *ce,
return 0;
if (!state->force)
return error("%s is a directory", path.buf);
-   remove_subtree();
+   remove_subtree(path.buf);
} else if (unlink(path.buf))
return error_errno("unable to unlink old '%s'", 
path.buf);
} else if (state->not_new)
-- 
2.7.4 (Apple Git-66)



[PATCH v8 1/5] dir_iterator: add tests for dir_iterator API

2017-04-05 Thread Daniel Ferreira
Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 Makefile |  1 +
 t/helper/.gitignore  |  1 +
 t/helper/test-dir-iterator.c | 29 +++
 t/t0065-dir-iterator.sh  | 55 
 4 files changed, 86 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

diff --git a/Makefile b/Makefile
index 9b36068..41ce9ab 100644
--- a/Makefile
+++ b/Makefile
@@ -614,6 +614,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 758ed2e..a7d74d3 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -3,6 +3,7 @@
 /test-config
 /test-date
 /test-delta
+/test-dir-iterator
 /test-dump-cache-tree
 /test-dump-split-index
 /test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 000..0394a13
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,29 @@
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv) {
+   struct strbuf path = STRBUF_INIT;
+   struct dir_iterator *diter;
+
+   if (argc < 2)
+   die("BUG: test-dir-iterator needs one argument");
+
+   strbuf_add(, argv[1], strlen(argv[1]));
+
+   diter = dir_iterator_begin(path.buf);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode))
+   printf("[d] ");
+   else if (S_ISREG(diter->st.st_mode))
+   printf("[f] ");
+   else
+   printf("[?] ");
+
+   printf("(%s) [%s] %s\n", diter->relative_path, diter->basename, 
diter->path.buf);
+   }
+
+   return 0;
+}
diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
new file mode 100755
index 000..b6d76dd
--- /dev/null
+++ b/t/t0065-dir-iterator.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+   mkdir -p dir &&
+   mkdir -p dir/a/b/c/ &&
+   >dir/b &&
+   >dir/c &&
+   mkdir -p dir/d/e/d/ &&
+   >dir/a/b/c/d &&
+   >dir/a/e &&
+   >dir/d/e/d/a &&
+
+   mkdir -p dir2/a/b/c/ &&
+   >dir2/a/b/c/d
+'
+
+cat >expect-sorted-output <<-\EOF &&
+[d] (a) [a] ./dir/a
+[d] (a/b) [b] ./dir/a/b
+[d] (a/b/c) [c] ./dir/a/b/c
+[d] (d) [d] ./dir/d
+[d] (d/e) [e] ./dir/d/e
+[d] (d/e/d) [d] ./dir/d/e/d
+[f] (a/b/c/d) [d] ./dir/a/b/c/d
+[f] (a/e) [e] ./dir/a/e
+[f] (b) [b] ./dir/b
+[f] (c) [c] ./dir/c
+[f] (d/e/d/a) [a] ./dir/d/e/d/a
+EOF
+
+test_expect_success 'dir-iterator should iterate through all files' '
+   test-dir-iterator ./dir >out &&
+   sort ./actual-pre-order-sorted-output &&
+
+   test_cmp expect-sorted-output actual-pre-order-sorted-output
+'
+
+cat >expect-pre-order-output <<-\EOF &&
+[d] (a) [a] ./dir2/a
+[d] (a/b) [b] ./dir2/a/b
+[d] (a/b/c) [c] ./dir2/a/b/c
+[f] (a/b/c/d) [d] ./dir2/a/b/c/d
+EOF
+
+test_expect_success 'dir-iterator should list files in the correct order' '
+   test-dir-iterator ./dir2 >actual-pre-order-output &&
+
+   test_cmp expect-pre-order-output actual-pre-order-output
+'
+
+test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v8 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-04-05 Thread Daniel Ferreira
This is the seventh version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use dir_iterator.

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
v4: 
https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
v5: 
https://public-inbox.org/git/1490844730-47634-1-git-send-email-bnm...@gmail.com/T/#m2323f15e45de699f2e09364f40a62e17047cf453
v6: 
https://public-inbox.org/git/1491107726-21504-1-git-send-email-bnm...@gmail.com/T/#t
v7: 
https://public-inbox.org/git/1491163388-41255-1-git-send-email-bnm...@gmail.com/T/#t

Travis CI build: https://travis-ci.org/theiostream/git/jobs/21982

In this version, I applied pretty much all suggestions Michael and Stefan had
for the patch.

Michael, regarding the patch you sent for parsing the arguments on the
test-dir-iterator helper, I did not squash because it'd generate too many
merge conflicts. I just preferred add your code manually. Let me know how I
can properly credit you for it.

The only "controversial" part of this code is how I ended up caching the result
of lstat() on the dir_iterator_level struct. Having to handle the case of the
root directory ended up making set_iterator_data() way more complicated (having
to handle the case of level->stat not being set by push_dir_level()), as well
as required the introduction of st_is_set member in the level struct. This issue
would be solved if we could lstat() the root dir on dir_iterator_begin() and
possibly return an error there. Regardless, I appreciate other suggestions to
make this less complex.

Daniel Ferreira (5):
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): test removing nested directories
  dir_iterator: add helpers to dir_iterator_advance
  dir_iterator: refactor state machine model
  remove_subtree(): reimplement using iterators

 Makefile|   1 +
 dir-iterator.c  | 247 +---
 dir-iterator.h  |  35 --
 entry.c |  39 +++
 refs/files-backend.c|   2 +-
 t/helper/.gitignore |   1 +
 t/helper/test-dir-iterator.c|  48 
 t/t0065-dir-iterator.sh |  93 +++
 t/t2000-checkout-cache-clash.sh |  11 ++
 9 files changed, 373 insertions(+), 104 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

--
2.7.4 (Apple Git-66)



Re: [PATCH v7 4/5] dir_iterator: refactor state machine model

2017-04-03 Thread Daniel Ferreira (theiostream)
On Mon, Apr 3, 2017 at 12:36 AM, Michael Haggerty  wrote:
> As far as I can tell, you got the logic in this complicated big loop
> correct on the first try (well, if we ignore v6 :-) ), even as you added
> new features. I think that's good evidence that the new structure is
> more comprehensible than the old, plus the new tests probably helped.
> That's a big win!

Thanks for ignoring v6 ;)

Another gain is that the other proposed features (only iterate over
directories, do not recurse into subdirectories) are also quite easy
to add with this new structure.

> It's not ideal that the test code depends on the numerical values of the
> flag constants, and it makes the tests harder to understand. It would be
> better if this program were to accept options like `--pre-order`,
> `--post-order`, etc., as I suggested in an earlier round of review.

Although it does make tests harder to understand, if we were to
specify how to iterate with human-readable flags we'd add the getopt()
+ flag configuration overhead to this helper program to be able to
handle all cases properly. Additionally, new flags added to
dir_iterator would have to edit the test program as well, generating
extra work.

I personally think that the string describing the test in the script
is enough to explain what the flag-as-argument is doing for the sake
of readability. The only gain I see in your suggestion is that we
avoid the programmer committing errors calculating the flag by hand
when writing the test.

That said, I'd appreciate some more thought on this.

> Michael
>

Thanks for the review. I agree with all other points and I'll address
them in a next series.


Re: [GSoC] Proposal: turn git-add--interactive.perl into a builtin

2017-04-02 Thread Daniel Ferreira (theiostream)
On Sun, Apr 2, 2017 at 4:43 PM, Johannes Schindelin
 wrote:
> We ask to accomplish a microproject before evaluating the proposals for
> one reason: to have a good understanding how well the students would
> interact with the project if they were accepted. As such, the
> microprojects really are about the flow of the contribution, not to tackle
> the project already.
> Meaning: I would recommend staying with your microproject, in particular
> if it is already in full swing.

Oh, when I mentioned these bugfixes I meant I'd be willing to do those
*plus* my microproject before the coding period begins as a "warm-up"
to the project. I'm certainly staying with the microproject until the
end!

-- Daniel.


[PATCH v7 5/5] remove_subtree(): reimplement using iterators

2017-04-02 Thread Daniel Ferreira
From: Daniel Ferreira <daniel.calib...@gmail.com>

Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 38 --
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index d2b512d..bd06f41 100644
--- a/entry.c
+++ b/entry.c
@@ -3,6 +3,8 @@
 #include "dir.h"
 #include "streaming.h"
 #include "submodule.h"
+#include "iterator.h"
+#include "dir-iterator.h"
 
 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -45,33 +47,17 @@ static void create_directories(const char *path, int 
path_len,
free(buf);
 }
 
-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
-   die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
+   struct dir_iterator *diter = dir_iterator_begin(path, 
DIR_ITERATOR_POST_ORDER_TRAVERSAL | DIR_ITERATOR_LIST_ROOT_DIR);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", diter->path.buf);
+   } else if (unlink(diter->path.buf))
+   die_errno("cannot unlink '%s'", diter->path.buf);
}
-   closedir(dir);
-   if (rmdir(path->buf))
-   die_errno("cannot rmdir '%s'", path->buf);
 }
 
 static int create_file(const char *path, unsigned int mode)
@@ -312,7 +298,7 @@ int checkout_entry(struct cache_entry *ce,
return 0;
if (!state->force)
return error("%s is a directory", path.buf);
-   remove_subtree();
+   remove_subtree(path.buf);
} else if (unlink(path.buf))
return error_errno("unable to unlink old '%s'", 
path.buf);
} else if (state->not_new)
-- 
2.7.4 (Apple Git-66)



[PATCH v7 4/5] dir_iterator: refactor state machine model

2017-04-02 Thread Daniel Ferreira
Perform major refactor of dir_iterator_advance(). dir_iterator has
ceased to rely on a convoluted state machine mechanism of two loops and
two state variables (level.initialized and level.dir_state). This serves
to ease comprehension of the iterator mechanism and ease addition of new
features to the iterator.

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18). This is useful for
recursively removing a directory and calling rmdir() on a directory only
after all of its contents have been wiped.

Add an option for the dir_iterator API to iterate over the root
directory (the one it was initialized with) as well.

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned new features to be enabled. The new default behavior
(i.e. flags set to 0) does not iterate over directories. Flag
DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
iterates over the root directory. These flags do not conflict with each
other and may be used simultaneously.

Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
the flags parameter introduced.

Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
test "post-order" and "iterate-over-root" modes.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c   | 155 +++
 dir-iterator.h   |  28 ++--
 refs/files-backend.c |   2 +-
 t/helper/test-dir-iterator.c |   6 +-
 t/t0065-dir-iterator.sh  |  61 -
 5 files changed, 183 insertions(+), 69 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index ce8bf81..18b7e68 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;
 
/*
@@ -20,8 +18,11 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
-   DIR_STATE_ITER,
-   DIR_STATE_RECURSE
+   DIR_STATE_PUSHED,
+   DIR_STATE_PRE_ITERATION,
+   DIR_STATE_ITERATING,
+   DIR_STATE_POST_ITERATION,
+   DIR_STATE_EXHAUSTED
} dir_state;
 };
 
@@ -48,15 +49,20 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
 static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
 {
-   level->dir_state = DIR_STATE_RECURSE;
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir = NULL;
+   level->dir_state = DIR_STATE_PUSHED;
 }
 
 static inline int pop_dir_level(struct dir_iterator_int *iter)
@@ -75,18 +81,35 @@ static inline int set_iterator_data(struct dir_iterator_int 
*iter, struct dir_it
}
 
/*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
+* Check if we are dealing with the root directory as an
+* item that's being iterated through.
 */
-   iter->base.relative_path =
-   iter->base.path.buf + iter->levels[0].prefix_len;
+   if (level->dir_state != DIR_STATE_ITERATING &&
+   iter->levels_nr == 1) {
+   iter->base.relative_path = ".";
+   }
+   else {
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   }
iter->base.basename =
iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
 
return 0;
 }
 
+/*
+ * This function uses a state machine with the following states:
+ * -> DIR_STATE_PUSHED: the directory has been pushed to the
+ * iterator traversal tree.
+ * -> DIR_STATE_PRE_ITERATION: the directory is *NOT* initialized. The
+ * dirpath has already been returned if pre-order traversal is set.
+ * -> DIR_STATE_ITERATING: the directory is initialized. We are traversing
+ * through it.
+ * -> DIR_STATE_POST_ITERATION: the directory has been iterated through.
+ * We are ready to close it.
+ * -> DIR_STATE_EXHAUSTED: the directory is closed and ready to be popped.
+ */
 int dir_iterator_advance(s

[PATCH v7 3/5] dir_iterator: add helpers to dir_iterator_advance

2017-04-02 Thread Daniel Ferreira
Create inline helpers to dir_iterator_advance(). Make
dir_iterator_advance()'s code more legible and allow some behavior to
be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 65 +-
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..ce8bf81 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };
 
+static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static inline int pop_dir_level(struct dir_iterator_int *iter)
+{
+   return --iter->levels_nr;
+}
+
+static inline int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +121,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +137,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
 
continue;
@@ -129,7 +162,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));
 
level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +171,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;
 
strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }
 
-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (set_iterator_data(iter, level))
+   continue;
 
return ITER_OK;
}
-- 
2.7.4 (Apple Git-66)



[PATCH v7 1/5] dir_iterator: add tests for dir_iterator API

2017-04-02 Thread Daniel Ferreira
Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 Makefile |  1 +
 t/helper/.gitignore  |  1 +
 t/helper/test-dir-iterator.c | 28 +++
 t/t0065-dir-iterator.sh  | 54 
 4 files changed, 84 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

diff --git a/Makefile b/Makefile
index 9b36068..41ce9ab 100644
--- a/Makefile
+++ b/Makefile
@@ -614,6 +614,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index 758ed2e..a7d74d3 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -3,6 +3,7 @@
 /test-config
 /test-date
 /test-delta
+/test-dir-iterator
 /test-dump-cache-tree
 /test-dump-split-index
 /test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 000..06f03fc
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,28 @@
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv) {
+   struct strbuf path = STRBUF_INIT;
+   struct dir_iterator *diter;
+
+   if (argc < 2) {
+   return 1;
+   }
+
+   strbuf_add(, argv[1], strlen(argv[1]));
+
+   diter = dir_iterator_begin(path.buf);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode))
+   printf("[d] ");
+   else
+   printf("[f] ");
+
+   printf("(%s) %s\n", diter->relative_path, diter->path.buf);
+   }
+
+   return 0;
+}
diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
new file mode 100755
index 000..b857c07
--- /dev/null
+++ b/t/t0065-dir-iterator.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+cat >expect-sorted-output <<-\EOF &&
+[d] (a) ./dir/a
+[d] (a/b) ./dir/a/b
+[d] (a/b/c) ./dir/a/b/c
+[d] (d) ./dir/d
+[d] (d/e) ./dir/d/e
+[d] (d/e/d) ./dir/d/e/d
+[f] (a/b/c/d) ./dir/a/b/c/d
+[f] (a/e) ./dir/a/e
+[f] (b) ./dir/b
+[f] (c) ./dir/c
+[f] (d/e/d/a) ./dir/d/e/d/a
+EOF
+
+test_expect_success 'dir-iterator should iterate through all files' '
+   mkdir -p dir &&
+   mkdir -p dir/a/b/c/ &&
+   >dir/b &&
+   >dir/c &&
+   mkdir -p dir/d/e/d/ &&
+   >dir/a/b/c/d &&
+   >dir/a/e &&
+   >dir/d/e/d/a &&
+
+   test-dir-iterator ./dir | sort >./actual-pre-order-sorted-output &&
+   rm -rf dir &&
+
+   test_cmp expect-sorted-output actual-pre-order-sorted-output
+'
+
+cat >expect-pre-order-output <<-\EOF &&
+[d] (a) ./dir/a
+[d] (a/b) ./dir/a/b
+[d] (a/b/c) ./dir/a/b/c
+[f] (a/b/c/d) ./dir/a/b/c/d
+EOF
+
+test_expect_success 'dir-iterator should list files in the correct order' '
+   mkdir -p dir/a/b/c/ &&
+   >dir/a/b/c/d &&
+
+   test-dir-iterator ./dir >actual-pre-order-output &&
+   rm -rf dir &&
+
+   test_cmp expect-pre-order-output actual-pre-order-output
+'
+
+test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v7 2/5] remove_subtree(): test removing nested directories

2017-04-02 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '
 
+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v7 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-04-02 Thread Daniel Ferreira
This is the seventh version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use dir_iterator.

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
v4: 
https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
v5: 
https://public-inbox.org/git/1490844730-47634-1-git-send-email-bnm...@gmail.com/T/#m2323f15e45de699f2e09364f40a62e17047cf453
v6: 
https://public-inbox.org/git/1491107726-21504-1-git-send-email-bnm...@gmail.com/T/#t

I screwed up in v6 because I had introduced a bug that in case git tried to open
a directory that did not exist using dir_iterator, the program would segfault. 
This
was amended and all commits are passing the tests. Sorry for not having tested
my changes properly.

CI build: https://travis-ci.org/theiostream/git

Since the changes in v6 were not reviewed, I'll just copy what was sent
back there.

> Back in v5, Michael had a number of suggestions, all of which were applied
> to this version (including a slightly modified version of his "biggish 
> rewrite"
> project to make dir_iterator's state machine simpler). The only suggestion 
> that
> did not make it into this series was that of not traversing into 
> subdirectories,
> since I believe it would be better off in another series that actually 
> required
> that feature (that is, I do not want a series to implement a feature it will
> not need). The same goes for Junio's thought on a flag to list *only* 
> directories
> and no files on the v4 discussion.

> Junio and Peff's comments about how to write to files in the tests were also
> considered, and the tests were adjusted.

> I chose to squash both the state machine refactor and the addition of the
> new flags in a single commit. I do not know whether you will feel this is
> the right choice but it seemed natural, since most of the state machine's
> new logic would not even make sense without encompassing the new features.
> I am, of course, open for feedback on this decision.

Daniel Ferreira (5):
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): test removing nested directories
  dir_iterator: add helpers to dir_iterator_advance
  dir_iterator: refactor state machine model
  remove_subtree(): reimplement using iterators

 Makefile|   1 +
 dir-iterator.c  | 196 ++--
 dir-iterator.h  |  28 --
 entry.c |  38 +++-
 refs/files-backend.c|   2 +-
 t/helper/.gitignore |   1 +
 t/helper/test-dir-iterator.c|  32 +++
 t/t0065-dir-iterator.sh | 109 ++
 t/t2000-checkout-cache-clash.sh |  11 +++
 9 files changed, 316 insertions(+), 102 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

--
2.7.4 (Apple Git-66)



Re: [PATCH v6 4/5] dir_iterator: refactor state machine model

2017-04-01 Thread Daniel Ferreira (theiostream)
Gah, I just realized I failed to correct refs/files-backend.c's
behavior and kept 0 instead of DIR_ITERATOR_PRE_ORDER_TRAVERSAL as its
flag. I'll correct this on a v7, but I'll wait for the rest of your
reviews before sending that revision.

On Sun, Apr 2, 2017 at 1:35 AM, Daniel Ferreira <bnm...@gmail.com> wrote:
> Perform major refactor of dir_iterator_advance(). dir_iterator has
> ceased to rely on a convoluted state machine mechanism of two loops and
> two state variables (level.initialized and level.dir_state). This serves
> to ease comprehension of the iterator mechanism and ease addition of new
> features to the iterator.
>
> Create an option for the dir_iterator API to iterate over subdirectories
> only after having iterated through their contents. This feature was
> predicted, although not implemented by 0fe5043 ("dir_iterator: new API
> for iterating over a directory tree", 2016-06-18). This is useful for
> recursively removing a directory and calling rmdir() on a directory only
> after all of its contents have been wiped.
>
> Add an option for the dir_iterator API to iterate over the root
> directory (the one it was initialized with) as well.
>
> Add the "flags" parameter to dir_iterator_create, allowing for the
> aforementioned new features to be enabled. The new default behavior
> (i.e. flags set to 0) does not iterate over directories. Flag
> DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
> so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
> directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
> iterates over the root directory. These flags do not conflict with each
> other and may be used simultaneously.
>
> Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
> the flags parameter introduced.
>
> Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
> test "post-order" and "iterate-over-root" modes.
>
> Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
> ---
>  dir-iterator.c   | 149 
> +++
>  dir-iterator.h   |  28 ++--
>  refs/files-backend.c |   2 +-
>  t/helper/test-dir-iterator.c |   6 +-
>  t/t0065-dir-iterator.sh  |  61 +-
>  5 files changed, 179 insertions(+), 67 deletions(-)
>
> diff --git a/dir-iterator.c b/dir-iterator.c
> index ce8bf81..4b23889 100644
> --- a/dir-iterator.c
> +++ b/dir-iterator.c
> @@ -4,8 +4,6 @@
>  #include "dir-iterator.h"
>
>  struct dir_iterator_level {
> -   int initialized;
> -
> DIR *dir;
>
> /*
> @@ -20,8 +18,11 @@ struct dir_iterator_level {
>  * iteration and also iterated into):
>  */
> enum {
> -   DIR_STATE_ITER,
> -   DIR_STATE_RECURSE
> +   DIR_STATE_PUSHED,
> +   DIR_STATE_PRE_ITERATION,
> +   DIR_STATE_ITERATING,
> +   DIR_STATE_POST_ITERATION,
> +   DIR_STATE_EXHAUSTED
> } dir_state;
>  };
>
> @@ -48,15 +49,20 @@ struct dir_iterator_int {
>  * that will be included in this iteration.
>  */
> struct dir_iterator_level *levels;
> +
> +   /* Holds the flags passed to dir_iterator_begin(). */
> +   unsigned flags;
>  };
>
>  static inline void push_dir_level(struct dir_iterator_int *iter, struct 
> dir_iterator_level *level)
>  {
> -   level->dir_state = DIR_STATE_RECURSE;
> ALLOC_GROW(iter->levels, iter->levels_nr + 1,
>iter->levels_alloc);
> +
> +   /* Push a new level */
> level = >levels[iter->levels_nr++];
> -   level->initialized = 0;
> +   level->dir = NULL;
> +   level->dir_state = DIR_STATE_PUSHED;
>  }
>
>  static inline int pop_dir_level(struct dir_iterator_int *iter)
> @@ -75,18 +81,35 @@ static inline int set_iterator_data(struct 
> dir_iterator_int *iter, struct dir_it
> }
>
> /*
> -* We have to set these each time because
> -* the path strbuf might have been realloc()ed.
> +* Check if we are dealing with the root directory as an
> +* item that's being iterated through.
>  */
> -   iter->base.relative_path =
> -   iter->base.path.buf + iter->levels[0].prefix_len;
> +   if (level->dir_state != DIR_STATE_ITERATING &&
> +   iter->levels_nr == 1) {
> +   iter->base.relative_path = ".";
> +   }
> +   else {
> +   iter->base.relative_path =
> + 

[PATCH v6 5/5] remove_subtree(): reimplement using iterators

2017-04-01 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 38 --
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index c6eea24..309b9ad 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@
 #include "blob.h"
 #include "dir.h"
 #include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"

 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -44,33 +46,17 @@ static void create_directories(const char *path, int 
path_len,
free(buf);
 }

-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
-   die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
+   struct dir_iterator *diter = dir_iterator_begin(path, 
DIR_ITERATOR_POST_ORDER_TRAVERSAL | DIR_ITERATOR_LIST_ROOT_DIR);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", diter->path.buf);
+   } else if (unlink(diter->path.buf))
+   die_errno("cannot unlink '%s'", diter->path.buf);
}
-   closedir(dir);
-   if (rmdir(path->buf))
-   die_errno("cannot rmdir '%s'", path->buf);
 }

 static int create_file(const char *path, unsigned int mode)
@@ -282,7 +268,7 @@ int checkout_entry(struct cache_entry *ce,
return 0;
if (!state->force)
return error("%s is a directory", path.buf);
-   remove_subtree();
+   remove_subtree(path.buf);
} else if (unlink(path.buf))
return error_errno("unable to unlink old '%s'", 
path.buf);
} else if (state->not_new)
--
2.7.4 (Apple Git-66)



[PATCH v6 3/5] dir_iterator: add helpers to dir_iterator_advance

2017-04-01 Thread Daniel Ferreira
Create inline helpers to dir_iterator_advance(). Make
dir_iterator_advance()'s code more legible and allow some behavior to
be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 65 +-
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..ce8bf81 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };

+static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static inline int pop_dir_level(struct dir_iterator_int *iter)
+{
+   return --iter->levels_nr;
+}
+
+static inline int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +121,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +137,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);

continue;
@@ -129,7 +162,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));

level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +171,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;

strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }

-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (set_iterator_data(iter, level))
+   continue;

return ITER_OK;
}
--
2.7.4 (Apple Git-66)



[PATCH v6 4/5] dir_iterator: refactor state machine model

2017-04-01 Thread Daniel Ferreira
Perform major refactor of dir_iterator_advance(). dir_iterator has
ceased to rely on a convoluted state machine mechanism of two loops and
two state variables (level.initialized and level.dir_state). This serves
to ease comprehension of the iterator mechanism and ease addition of new
features to the iterator.

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18). This is useful for
recursively removing a directory and calling rmdir() on a directory only
after all of its contents have been wiped.

Add an option for the dir_iterator API to iterate over the root
directory (the one it was initialized with) as well.

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned new features to be enabled. The new default behavior
(i.e. flags set to 0) does not iterate over directories. Flag
DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
iterates over the root directory. These flags do not conflict with each
other and may be used simultaneously.

Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
the flags parameter introduced.

Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
test "post-order" and "iterate-over-root" modes.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c   | 149 +++
 dir-iterator.h   |  28 ++--
 refs/files-backend.c |   2 +-
 t/helper/test-dir-iterator.c |   6 +-
 t/t0065-dir-iterator.sh  |  61 +-
 5 files changed, 179 insertions(+), 67 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index ce8bf81..4b23889 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;
 
/*
@@ -20,8 +18,11 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
-   DIR_STATE_ITER,
-   DIR_STATE_RECURSE
+   DIR_STATE_PUSHED,
+   DIR_STATE_PRE_ITERATION,
+   DIR_STATE_ITERATING,
+   DIR_STATE_POST_ITERATION,
+   DIR_STATE_EXHAUSTED
} dir_state;
 };
 
@@ -48,15 +49,20 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
 static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
 {
-   level->dir_state = DIR_STATE_RECURSE;
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir = NULL;
+   level->dir_state = DIR_STATE_PUSHED;
 }
 
 static inline int pop_dir_level(struct dir_iterator_int *iter)
@@ -75,18 +81,35 @@ static inline int set_iterator_data(struct dir_iterator_int 
*iter, struct dir_it
}
 
/*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
+* Check if we are dealing with the root directory as an
+* item that's being iterated through.
 */
-   iter->base.relative_path =
-   iter->base.path.buf + iter->levels[0].prefix_len;
+   if (level->dir_state != DIR_STATE_ITERATING &&
+   iter->levels_nr == 1) {
+   iter->base.relative_path = ".";
+   }
+   else {
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   }
iter->base.basename =
iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
 
return 0;
 }
 
+/*
+ * This function uses a state machine with the following states:
+ * -> DIR_STATE_PUSHED: the directory has been pushed to the
+ * iterator traversal tree.
+ * -> DIR_STATE_PRE_ITERATION: the directory is *NOT* initialized. The
+ * dirpath has already been returned if pre-order traversal is set.
+ * -> DIR_STATE_ITERATING: the directory is initialized. We are traversing
+ * through it.
+ * -> DIR_STATE_POST_ITERATION: the directory has been iterated through.
+ * We are ready to close it.
+ * -> DIR_STATE_EXHAUSTED: the directory is closed and ready to be popped.
+ */
 int dir_iterator_advance(s

[PATCH v6 2/5] remove_subtree(): test removing nested directories

2017-04-01 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '

+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
--
2.7.4 (Apple Git-66)



[PATCH v6 1/5] dir_iterator: add tests for dir_iterator API

2017-04-01 Thread Daniel Ferreira
Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 Makefile |  1 +
 t/helper/.gitignore  |  1 +
 t/helper/test-dir-iterator.c | 28 +++
 t/t0065-dir-iterator.sh  | 54 
 4 files changed, 84 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

diff --git a/Makefile b/Makefile
index a5a11e7..d0245f3 100644
--- a/Makefile
+++ b/Makefile
@@ -607,6 +607,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/.gitignore b/t/helper/.gitignore
index d6e8b36..9c9656d 100644
--- a/t/helper/.gitignore
+++ b/t/helper/.gitignore
@@ -3,6 +3,7 @@
 /test-config
 /test-date
 /test-delta
+/test-dir-iterator
 /test-dump-cache-tree
 /test-dump-split-index
 /test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 000..06f03fc
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,28 @@
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv) {
+   struct strbuf path = STRBUF_INIT;
+   struct dir_iterator *diter;
+
+   if (argc < 2) {
+   return 1;
+   }
+
+   strbuf_add(, argv[1], strlen(argv[1]));
+
+   diter = dir_iterator_begin(path.buf);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode))
+   printf("[d] ");
+   else
+   printf("[f] ");
+
+   printf("(%s) %s\n", diter->relative_path, diter->path.buf);
+   }
+
+   return 0;
+}
diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
new file mode 100755
index 000..b857c07
--- /dev/null
+++ b/t/t0065-dir-iterator.sh
@@ -0,0 +1,54 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+cat >expect-sorted-output <<-\EOF &&
+[d] (a) ./dir/a
+[d] (a/b) ./dir/a/b
+[d] (a/b/c) ./dir/a/b/c
+[d] (d) ./dir/d
+[d] (d/e) ./dir/d/e
+[d] (d/e/d) ./dir/d/e/d
+[f] (a/b/c/d) ./dir/a/b/c/d
+[f] (a/e) ./dir/a/e
+[f] (b) ./dir/b
+[f] (c) ./dir/c
+[f] (d/e/d/a) ./dir/d/e/d/a
+EOF
+
+test_expect_success 'dir-iterator should iterate through all files' '
+   mkdir -p dir &&
+   mkdir -p dir/a/b/c/ &&
+   >dir/b &&
+   >dir/c &&
+   mkdir -p dir/d/e/d/ &&
+   >dir/a/b/c/d &&
+   >dir/a/e &&
+   >dir/d/e/d/a &&
+
+   test-dir-iterator ./dir | sort >./actual-pre-order-sorted-output &&
+   rm -rf dir &&
+
+   test_cmp expect-sorted-output actual-pre-order-sorted-output
+'
+
+cat >expect-pre-order-output <<-\EOF &&
+[d] (a) ./dir/a
+[d] (a/b) ./dir/a/b
+[d] (a/b/c) ./dir/a/b/c
+[f] (a/b/c/d) ./dir/a/b/c/d
+EOF
+
+test_expect_success 'dir-iterator should list files in the correct order' '
+   mkdir -p dir/a/b/c/ &&
+   >dir/a/b/c/d &&
+
+   test-dir-iterator ./dir >actual-pre-order-output &&
+   rm -rf dir &&
+
+   test_cmp expect-pre-order-output actual-pre-order-output
+'
+
+test_done
--
2.7.4 (Apple Git-66)



[PATCH v6 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-04-01 Thread Daniel Ferreira
This is the sixth version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use dir_iterator.

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
v4: 
https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a
v5: 
https://public-inbox.org/git/1490844730-47634-1-git-send-email-bnm...@gmail.com/T/#m2323f15e45de699f2e09364f40a62e17047cf453

Back in v5, Michael had a number of suggestions, all of which were applied
to this version (including a slightly modified version of his "biggish rewrite"
project to make dir_iterator's state machine simpler). The only suggestion that
did not make it into this series was that of not traversing into subdirectories,
since I believe it would be better off in another series that actually required
that feature (that is, I do not want a series to implement a feature it will
not need). The same goes for Junio's thought on a flag to list *only* 
directories
and no files on the v4 discussion.

Junio and Peff's comments about how to write to files in the tests were also
considered, and the tests were adjusted.

I chose to squash both the state machine refactor and the addition of the
new flags in a single commit. I do not know whether you will feel this is
the right choice but it seemed natural, since most of the state machine's
new logic would not even make sense without encompassing the new features.
I am, of course, open for feedback on this decision.

To Michael and Duy, thanks -- really -- for the encouraging comments! :)
I never regarded this microproject purely as a means to fulfill a GSoC
requirement, but as a way to get to learn more about Git, so I'm surely
not giving it up.

Once again, thanks for all the previous reviews,
Daniel.

Daniel Ferreira (5):
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): test removing nested directories
  dir_iterator: add helpers to dir_iterator_advance
  dir_iterator: refactor state machine model
  remove_subtree(): reimplement using iterators

 Makefile|   1 +
 dir-iterator.c  | 190 ++--
 dir-iterator.h  |  28 --
 entry.c |  38 +++-
 refs/files-backend.c|   2 +-
 t/helper/.gitignore |   1 +
 t/helper/test-dir-iterator.c|  32 +++
 t/t0065-dir-iterator.sh | 109 +++
 t/t2000-checkout-cache-clash.sh |  11 +++
 9 files changed, 312 insertions(+), 100 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

--
2.7.4 (Apple Git-66)



Re: [PATCH v4 2/5] dir_iterator: iterate over dir after its contents

2017-04-01 Thread Daniel Ferreira (theiostream)
Why exactly would it not be applicable to read_directory_recursively()?

On Thu, Mar 30, 2017 at 8:08 AM, Duy Nguyen  wrote:
> On Thu, Mar 30, 2017 at 1:39 PM, Michael Haggerty  
> wrote:
>> * DIR_ITERATOR_RECURSE -- recurse into subdirectories
>>
>> would make the set of possible options complete. If this option is not
>> set, then the iteration would be over the entries in a single directory
>> without traversing its subdirectories.
>
> And would make it possible to use dir-iterator everywhere (except
> read_directory_recursiveky). That would be really nice :)
> --
> Duy


Re: [GSoC] Proposal: turn git-add--interactive.perl into a builtin

2017-03-31 Thread Daniel Ferreira (theiostream)
Well, Google requires me to have a draft on a Google Doc anyway for
the proposal, and I am unsure who exactly it will reach. Since it *is*
part of the discussion regarding my proposal, I suppose it is worth
posting here for anyone to comment:
https://docs.google.com/document/d/1dvF2PNRQvvZ351jCdKzOLs7tzaDqhR7ci7TDgzYQg9I/edit?usp=sharing.

-- Daniel.

On Fri, Mar 31, 2017 at 2:07 AM, Daniel Ferreira (theiostream)
<bnm...@gmail.com> wrote:
> Hi Stefan & Johannes,
>
> Thank you for the precious feedback on the proposal. I don't see much
> sense in sending a full "v2" of it and have you read it all over
> again, so I'll just answer to your comments directly.
>
> Also, although the GSoC website allows me to send a "proposal draft"
> to you through the website, since I've already sent it here that
> shouldn't be necessary, correct? I intend to use it just to send the
> final thing.
>
> On Wed, Mar 29, 2017 at 9:01 PM, Johannes Schindelin
> <johannes.schinde...@gmx.de> wrote:
>> On Tue, 28 Mar 2017, Stefan Beller wrote:
>>
>>> On Sat, Mar 25, 2017 at 8:15 PM, Daniel Ferreira (theiostream)
>>> <bnm...@gmail.com> wrote:
>>>
>>> > SYNOPSIS
>>> > There are many advantages to converting parts of git that are still
>>> > scripts to C builtins, among which execution speed, improved
>>> > compatibility and code deduplication.
>>>
>>> agreed.
>>
>> I would even add portability. But yeah, speed is a big thing. I am an
>> extensive user of `git add -p` (which is backed by
>> git-add--interactive.perl) and it is slow as molasses on Windows, just
>> because it is a Perl script (and the Perl interpreter needs to emulate
>> POSIX functionality that is frequently not even needed, such as: copying
>> all memory and reopening all file descriptors in a fork() call only to
>> exec() git.exe right away, tossing all of the diligently work into the
>> dustbin).
>
> Thanks for this example – it hadn't come to my mind since I don't use
> Git on Windows. I'll be sure to complement the synopsis with it. :)
>
>>
>>> > FEASIBILITY
>>> >
>>> > There was only one discussion regarding the feasibility of its porting
>>> > (https://public-inbox.org/git/CAP8UFD2PcBsU6=FK4OHVrB7E98ycohS_0pYcbCBar=of1hl...@mail.gmail.com/).
>>> > It resulted in a consensus that doing it would be a task too large –
>>> > although interesting – for GSoC 2015 based on the amount of its lines
>>> > of code. It is, however, only a few lines larger than
>>> > git-rebase--interactive, which has been considered an appropriate
>>> > idea. As such, it looks like a possible project for three months of
>>> > full-time work.
>>>
>>> ok, it sounds a challenging project. (currently counting 1750 lines of
>>> code). Scrolling over the source code, there are quite a couple of
>>> functions, where the direct equivalent in C springs to mind.
>>>
>>> run_cmd_pipe -> see run-command.h
>>> unquote_path -> unquote_c_style ?
>>> refresh -> update_index_if_able()
>>> list_modified -> iterate over "const struct cache_entry *ce = 
>>> active_cache[i];"
>
> Thank you for these functions. I don't think I will be able to specify
> them in detail as part of the projected timeline (e.g. "June 1:
> convert calls to refresh() to use update_index_if_able()") already
> because there is not enough time prior to the proposal deadline to
> study their behavior in detail, and I like to avoid talking about
> things I don't fully understand. Although I think I can cite them as
> examples for a thesis I had put elsewhere in the proposal that "Git
> APIs in Perl already have functional equivalents in C".
>
> Also, they will be great for my early investigation stage into
> git-add--interactive. :) Once more, thanks for having listed them.
>
>> Yes, I think it would be more important to acquaint oneself with the
>> idiosynchracies of Git's internal "API" than to get familiar with Perl:
>> interpreting what obscure Perl code does is something I would gladly do as
>> a mentor.
>
> That's really nice! I usually don't get stuck when trying to
> understand code in languages I'm not too well acquainted with, but I
> figured getting more familiar with Perl would speed development up.
> But it does make sense that this "prior to May 4" might be better
> invested learning about git's internals than Perl.
>
> Question: do you suggest any pending bugfix to git-add--interactive or
> to something related t

Re: [GSoC] Proposal: turn git-add--interactive.perl into a builtin

2017-03-30 Thread Daniel Ferreira (theiostream)
Hi Stefan & Johannes,

Thank you for the precious feedback on the proposal. I don't see much
sense in sending a full "v2" of it and have you read it all over
again, so I'll just answer to your comments directly.

Also, although the GSoC website allows me to send a "proposal draft"
to you through the website, since I've already sent it here that
shouldn't be necessary, correct? I intend to use it just to send the
final thing.

On Wed, Mar 29, 2017 at 9:01 PM, Johannes Schindelin
<johannes.schinde...@gmx.de> wrote:
> On Tue, 28 Mar 2017, Stefan Beller wrote:
>
>> On Sat, Mar 25, 2017 at 8:15 PM, Daniel Ferreira (theiostream)
>> <bnm...@gmail.com> wrote:
>>
>> > SYNOPSIS
>> > There are many advantages to converting parts of git that are still
>> > scripts to C builtins, among which execution speed, improved
>> > compatibility and code deduplication.
>>
>> agreed.
>
> I would even add portability. But yeah, speed is a big thing. I am an
> extensive user of `git add -p` (which is backed by
> git-add--interactive.perl) and it is slow as molasses on Windows, just
> because it is a Perl script (and the Perl interpreter needs to emulate
> POSIX functionality that is frequently not even needed, such as: copying
> all memory and reopening all file descriptors in a fork() call only to
> exec() git.exe right away, tossing all of the diligently work into the
> dustbin).

Thanks for this example – it hadn't come to my mind since I don't use
Git on Windows. I'll be sure to complement the synopsis with it. :)

>
>> > FEASIBILITY
>> >
>> > There was only one discussion regarding the feasibility of its porting
>> > (https://public-inbox.org/git/CAP8UFD2PcBsU6=FK4OHVrB7E98ycohS_0pYcbCBar=of1hl...@mail.gmail.com/).
>> > It resulted in a consensus that doing it would be a task too large –
>> > although interesting – for GSoC 2015 based on the amount of its lines
>> > of code. It is, however, only a few lines larger than
>> > git-rebase--interactive, which has been considered an appropriate
>> > idea. As such, it looks like a possible project for three months of
>> > full-time work.
>>
>> ok, it sounds a challenging project. (currently counting 1750 lines of
>> code). Scrolling over the source code, there are quite a couple of
>> functions, where the direct equivalent in C springs to mind.
>>
>> run_cmd_pipe -> see run-command.h
>> unquote_path -> unquote_c_style ?
>> refresh -> update_index_if_able()
>> list_modified -> iterate over "const struct cache_entry *ce = 
>> active_cache[i];"

Thank you for these functions. I don't think I will be able to specify
them in detail as part of the projected timeline (e.g. "June 1:
convert calls to refresh() to use update_index_if_able()") already
because there is not enough time prior to the proposal deadline to
study their behavior in detail, and I like to avoid talking about
things I don't fully understand. Although I think I can cite them as
examples for a thesis I had put elsewhere in the proposal that "Git
APIs in Perl already have functional equivalents in C".

Also, they will be great for my early investigation stage into
git-add--interactive. :) Once more, thanks for having listed them.

> Yes, I think it would be more important to acquaint oneself with the
> idiosynchracies of Git's internal "API" than to get familiar with Perl:
> interpreting what obscure Perl code does is something I would gladly do as
> a mentor.

That's really nice! I usually don't get stuck when trying to
understand code in languages I'm not too well acquainted with, but I
figured getting more familiar with Perl would speed development up.
But it does make sense that this "prior to May 4" might be better
invested learning about git's internals than Perl.

Question: do you suggest any pending bugfix to git-add--interactive or
to something related that might give some useful knowledge in advance?
(for the pre-code period). My microproject involves playing with the
dir_iterator interface, which is a great exercise in code refactoring
but really does not teach me too much about Git's architecture.

Even if you do not have an answer to this, I'm pretty sure I'll keep
this commitment to submitting some patch series somehow related to
git-add before GSoC begins, especially after this comment from
Johannes.

>
>> > PROJECTED TIMELINE
>> > - Prior to May 4
>> > -- Refine my basic knowledge of Perl
>> > -- Craft one or two small patches to some of Git's Perl components
>> > (preferentially to git-add--interactive itself) to improve my
>> > understanding of the language and of how Git's Perl scripts act

Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API

2017-03-30 Thread Daniel Ferreira (theiostream)
On Thu, Mar 30, 2017 at 5:05 AM, Michael Haggerty  wrote:
> Oh I forgot to mention, in the Git project we don't allow declarations
> to be mixed with code. Apparently there's some ancient compiler
> somewhere that doesn't allow it. Declarations always have to be
> together, at the top of a block. (Compile with
> `-Werror=declaration-after-statement` to detect this.)

Sorry about that. I'm compiling git on clang and it has a bug that
ignores this warning (it shows up on gcc). I'll watch out for it.


Re: [PATCH v5 4/6] dir_iterator: add tests for dir_iterator API

2017-03-30 Thread Daniel Ferreira (theiostream)
On Thu, Mar 30, 2017 at 4:46 AM, Michael Haggerty  wrote:
> Is there a special reason to write the date to the file as opposed to, say
>
> touch dir/b
>
> ? (Some people use `: >dir/b` for this purpose, though I've never found
> out why.) If you write the date to the file, the reader will be
> distracted unnecessarily wondering whether the contents are important to
> the test.
>

There's no reason. They will be `touch`ed instead of written in a next version.

`:` is a bash builtin that that returns an exit code of zero and
produces no output. On my Mac at least:

bash-3.2$ type :
: is a shell builtin
bash-3.2$ type touch
touch is /usr/bin/touch

I suppose there are reasons to try to keep the most of a shell
script's logic within the shell itself, without involving external
binaries.


[PATCH v5 5/6] remove_subtree(): reimplement using iterators

2017-03-29 Thread Daniel Ferreira
From: Daniel Ferreira <daniel.calib...@gmail.com>

Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 41 +++--
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/entry.c b/entry.c
index c6eea24..30197b2 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@
 #include "blob.h"
 #include "dir.h"
 #include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"

 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -44,33 +46,20 @@ static void create_directories(const char *path, int 
path_len,
free(buf);
 }

-static void remove_subtree(struct strbuf *path)
+static void remove_subtree(const char *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
-   die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
+   struct dir_iterator *diter = dir_iterator_begin(path, 
DIR_ITERATOR_POST_ORDER_TRAVERSAL);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", diter->path.buf);
+   } else if (unlink(diter->path.buf))
+   die_errno("cannot unlink '%s'", diter->path.buf);
}
-   closedir(dir);
-   if (rmdir(path->buf))
-   die_errno("cannot rmdir '%s'", path->buf);
+
+   if (rmdir(path))
+   die_errno("cannot rmdir '%s'", path);
 }

 static int create_file(const char *path, unsigned int mode)
@@ -282,7 +271,7 @@ int checkout_entry(struct cache_entry *ce,
return 0;
if (!state->force)
return error("%s is a directory", path.buf);
-   remove_subtree();
+   remove_subtree(path.buf);
} else if (unlink(path.buf))
return error_errno("unable to unlink old '%s'", 
path.buf);
} else if (state->not_new)
--
2.7.4 (Apple Git-66)



[PATCH v5 6/6] remove_subtree(): test removing nested directories

2017-03-29 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '
 
+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v5 4/6] dir_iterator: add tests for dir_iterator API

2017-03-29 Thread Daniel Ferreira
Create t/helper/test-dir-iterator.c, which prints relevant information
about a directory tree iterated over with dir_iterator.

Create t/t0065-dir-iterator.sh, which tests that dir_iterator does
iterate through a whole directory tree and that post-order directory
iteration is correctly implemented.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 Makefile |  1 +
 t/helper/test-dir-iterator.c | 32 +++
 t/t0065-dir-iterator.sh  | 45 
 3 files changed, 78 insertions(+)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

diff --git a/Makefile b/Makefile
index a5a11e7..d0245f3 100644
--- a/Makefile
+++ b/Makefile
@@ -607,6 +607,7 @@ TEST_PROGRAMS_NEED_X += test-ctype
 TEST_PROGRAMS_NEED_X += test-config
 TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
+TEST_PROGRAMS_NEED_X += test-dir-iterator
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
 TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644
index 000..b4a148f
--- /dev/null
+++ b/t/helper/test-dir-iterator.c
@@ -0,0 +1,32 @@
+#include "cache.h"
+#include "blob.h"
+#include "dir.h"
+#include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+int cmd_main(int argc, const char **argv) {
+   if (argc < 2) {
+   return 1;
+   }
+
+   struct strbuf path = STRBUF_INIT;
+   strbuf_add(, argv[1], strlen(argv[1]));
+
+   unsigned flag = 0;
+   if (argc == 3 && strcmp(argv[2], "--post-order") == 0)
+   flag = DIR_ITERATOR_POST_ORDER_TRAVERSAL;
+
+   struct dir_iterator *diter = dir_iterator_begin(()->buf, flag);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode))
+   printf("[d] ");
+   else
+   printf("[f] ");
+
+   printf("(%s) %s\n", diter->relative_path, diter->path.buf);
+   }
+
+   return 0;
+}
diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
new file mode 100755
index 000..3c8ea9a
--- /dev/null
+++ b/t/t0065-dir-iterator.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+
+test_description='Test directory iteration.'
+
+. ./test-lib.sh
+
+ITER_SORTED_OUTPUT='[d] (a) ./dir/a
+[d] (a/b) ./dir/a/b
+[d] (a/b/c) ./dir/a/b/c
+[d] (d) ./dir/d
+[d] (d/e) ./dir/d/e
+[d] (d/e/d) ./dir/d/e/d
+[f] (a/b/c/d) ./dir/a/b/c/d
+[f] (a/e) ./dir/a/e
+[f] (b) ./dir/b
+[f] (c) ./dir/c
+[f] (d/e/d/a) ./dir/d/e/d/a'
+
+test_expect_success 'dir-iterator should iterate through all files' '
+   mkdir -p dir &&
+   mkdir -p dir/a/b/c/ &&
+   date >dir/b &&
+   date >dir/c &&
+   mkdir -p dir/d/e/d/ &&
+   date >dir/a/b/c/d &&
+   date >dir/a/e &&
+   date >dir/d/e/d/a &&
+
+   test-dir-iterator ./dir >it &&
+   test "$(sort it)" == "$ITER_SORTED_OUTPUT"
+'
+
+ITER_POST_ORDER_OUTPUT='[f] (a/b/c/d) ./dir2/a/b/c/d
+[d] (a/b/c) ./dir2/a/b/c
+[d] (a/b) ./dir2/a/b
+[d] (a) ./dir2/a'
+
+test_expect_success 'dir-iterator should list files properly on post-order 
mode' '
+   mkdir -p dir2/a/b/c/ &&
+   date >dir2/a/b/c/d &&
+
+   test "$(test-dir-iterator ./dir2 --post-order)" == 
"$ITER_POST_ORDER_OUTPUT"
+'
+
+test_done
-- 
2.7.4 (Apple Git-66)



[PATCH v5 0/6] [GSoC] remove_subtree(): reimplement using iterators

2017-03-29 Thread Daniel Ferreira
This is the fifth version of a patch series that implements the GSoC
microproject of converting a recursive call to readdir() to use dir_iterator.

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t
v4: 
https://public-inbox.org/git/1490747533-89143-1-git-send-email-bnm...@gmail.com/T/#e437a63e0c22c00c69b5d92977c9b438ed2b9fd3a

I would like to really thank Michael for the incredibly thorough review of
the last version of this series. I never expected anyone to give that
level of attention to this change, and it's really, really appreciated.

All of the points he addressed are fixed in this version. As always, more
feedback is greatly appreciated.

Thanks,
Daniel.

Daniel Ferreira (6):
  dir_iterator: add helpers to dir_iterator_advance
  dir_iterator: refactor state machine model
  dir_iterator: iterate over dir after its contents
  dir_iterator: add tests for dir_iterator API
  remove_subtree(): reimplement using iterators
  remove_subtree(): test removing nested directories

 Makefile|   1 +
 dir-iterator.c  | 123 ++--
 dir-iterator.h  |  17 --
 entry.c |  41 +-
 refs/files-backend.c|   2 +-
 t/helper/test-dir-iterator.c|  32 +++
 t/t0065-dir-iterator.sh |  45 +++
 t/t2000-checkout-cache-clash.sh |  11 
 8 files changed, 210 insertions(+), 62 deletions(-)
 create mode 100644 t/helper/test-dir-iterator.c
 create mode 100755 t/t0065-dir-iterator.sh

--
2.7.4 (Apple Git-66)



[PATCH v5 3/6] dir_iterator: iterate over dir after its contents

2017-03-29 Thread Daniel Ferreira
Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18).

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned "depth-first" iteration mode to be enabled. Currently,
the only acceptable flag is DIR_ITERATOR_DEPTH_FIRST.

This is useful for recursively removing a directory and calling rmdir()
on a directory only after all of its contents have been wiped.

Amend a call to dir_iterator_begin() to pass the flags parameter
introduced.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c   | 53 
 dir-iterator.h   | 17 -
 refs/files-backend.c |  2 +-
 3 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 3ac984b..05d53d2 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -47,6 +47,9 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
 static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
@@ -113,12 +116,14 @@ int dir_iterator_advance(struct dir_iterator 
*dir_iterator)
iter->base.path.buf, strerror(errno));
/* Popping the level is handled below */
}
-   } else if (S_ISDIR(iter->base.st.st_mode)) {
+   } else if (S_ISDIR(iter->base.st.st_mode) &&
+   !(iter->flags & DIR_ITERATOR_POST_ORDER_TRAVERSAL)) {
if (level->dir_state == DIR_STATE_ITER) {
/*
 * The directory was just iterated
 * over; now prepare to iterate into
-* it.
+* it (unless an option is set for us
+* to do otherwise).
 */
push_dir_level(iter, level);
continue;
@@ -152,7 +157,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
de = readdir(level->dir);
 
if (!de) {
-   /* This level is exhausted; pop up a level. */
+   /* This level is exhausted  */
if (errno) {
warning("error reading directory %s: 
%s",
iter->base.path.buf, 
strerror(errno));
@@ -160,6 +165,32 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
warning("error closing directory %s: 
%s",
iter->base.path.buf, 
strerror(errno));
 
+   if (iter->flags & 
DIR_ITERATOR_POST_ORDER_TRAVERSAL) {
+   /* If we are handling dirpaths after 
their contents,
+* we have to iterate over the 
directory now that we'll
+* have finished iterating into it. */
+   level->dir = NULL;
+
+   if (pop_dir_level(iter) == 0)
+   return 
dir_iterator_abort(dir_iterator);
+
+   level = >levels[iter->levels_nr - 
1];
+   /* Since we are iterating through the 
dirpath
+* after we have gone through it, we 
still need
+* to get rid of the trailing slash we 
appended.
+*
+* This may generate issues if we ever 
want to
+* iterate through the root directory 
AND have
+* post-order traversal enabled.
+*/
+   strbuf_strip_suffix(>base.path, 
"/");
+
+   if (set_iterator_data(iter, level))
+   continue;
+
+   return ITER_OK;
+   }
+
level->dir = NULL;
if (pop_dir_level(iter) == 0)
 

[PATCH v5 1/6] dir_iterator: add helpers to dir_iterator_advance

2017-03-29 Thread Daniel Ferreira
Create inline helpers to dir_iterator_advance(). Make
dir_iterator_advance()'s code more legible and allow some behavior to
be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 65 +-
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..ce8bf81 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };

+static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static inline int pop_dir_level(struct dir_iterator_int *iter)
+{
+   return --iter->levels_nr;
+}
+
+static inline int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +121,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +137,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);

continue;
@@ -129,7 +162,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));

level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +171,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;

strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }

-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (set_iterator_data(iter, level))
+   continue;

return ITER_OK;
}
--
2.7.4 (Apple Git-66)



[PATCH v5 2/6] dir_iterator: refactor state machine model

2017-03-29 Thread Daniel Ferreira
Remove the "initialized" member of dir_iterator_level. Replace its
functionality with a DIR_STATE_PUSH state in the
dir_iterator_level.dir_state enum.

This serves to remove a redundant property in the dir_iterator_level
struct and ease comprehension of the state machine's behavior.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index ce8bf81..3ac984b 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"

 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;

/*
@@ -20,6 +18,7 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
+   DIR_STATE_PUSH,
DIR_STATE_ITER,
DIR_STATE_RECURSE
} dir_state;
@@ -55,8 +54,10 @@ static inline void push_dir_level(struct dir_iterator_int 
*iter, struct dir_iter
level->dir_state = DIR_STATE_RECURSE;
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir_state = DIR_STATE_PUSH;
 }

 static inline int pop_dir_level(struct dir_iterator_int *iter)
@@ -97,7 +98,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
>levels[iter->levels_nr - 1];
struct dirent *de;

-   if (!level->initialized) {
+   if (level->dir_state == DIR_STATE_PUSH) {
/*
 * Note: dir_iterator_begin() ensures that
 * path is not the empty string.
@@ -112,8 +113,6 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, strerror(errno));
/* Popping the level is handled below */
}
-
-   level->initialized = 1;
} else if (S_ISDIR(iter->base.st.st_mode)) {
if (level->dir_state == DIR_STATE_ITER) {
/*
@@ -215,7 +214,7 @@ struct dir_iterator *dir_iterator_begin(const char *path)
ALLOC_GROW(iter->levels, 10, iter->levels_alloc);

iter->levels_nr = 1;
-   iter->levels[0].initialized = 0;
+   iter->levels[0].dir_state = DIR_STATE_PUSH;

return dir_iterator;
 }
--
2.7.4 (Apple Git-66)



[PATCH v4 3/5] remove_subtree(): reimplement using iterators

2017-03-28 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 31 ++-
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/entry.c b/entry.c
index c6eea24..bbebd16 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@
 #include "blob.h"
 #include "dir.h"
 #include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"

 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -46,29 +48,16 @@ static void create_directories(const char *path, int 
path_len,

 static void remove_subtree(struct strbuf *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
+   struct dir_iterator *diter = dir_iterator_begin(path->buf, 
DIR_ITERATOR_DEPTH_FIRST);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", path->buf);
+   } else if (unlink(diter->path.buf))
die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
}
-   closedir(dir);
+
if (rmdir(path->buf))
die_errno("cannot rmdir '%s'", path->buf);
 }
--
2.7.4 (Apple Git-66)



[PATCH v4 2/5] dir_iterator: iterate over dir after its contents

2017-03-28 Thread Daniel Ferreira
Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18).

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned "depth-first" iteration mode to be enabled. Currently,
the only acceptable flag is DIR_ITERATOR_DEPTH_FIRST.

This is useful for recursively removing a directory and calling rmdir()
on a directory only after all of its contents have been wiped.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 46 ++
 dir-iterator.h | 14 +++---
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 853c040..545d333 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -48,6 +48,9 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };

 static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
@@ -114,12 +117,14 @@ int dir_iterator_advance(struct dir_iterator 
*dir_iterator)
}

level->initialized = 1;
-   } else if (S_ISDIR(iter->base.st.st_mode)) {
+   } else if (S_ISDIR(iter->base.st.st_mode) &&
+   !iter->flags & DIR_ITERATOR_DEPTH_FIRST) {
if (level->dir_state == DIR_STATE_ITER) {
/*
 * The directory was just iterated
 * over; now prepare to iterate into
-* it.
+* it (unless an option is set for us
+* to do otherwise).
 */
push_dir_level(iter, level);
continue;
@@ -153,10 +158,27 @@ int dir_iterator_advance(struct dir_iterator 
*dir_iterator)
de = readdir(level->dir);

if (!de) {
-   /* This level is exhausted; pop up a level. */
+   /* This level is exhausted  */
if (errno) {
warning("error reading directory %s: 
%s",
iter->base.path.buf, 
strerror(errno));
+   } else if (iter->flags & 
DIR_ITERATOR_DEPTH_FIRST) {
+   /* If we are handling dirpaths after 
their contents,
+* we have to iterate over the 
directory now that we'll
+* have finished iterating into it. */
+   level->dir = NULL;
+
+   if (pop_dir_level(iter, level) == 0)
+   return 
dir_iterator_abort(dir_iterator);
+
+   level = >levels[iter->levels_nr - 
1];
+   /* Remove a trailing slash */
+   strbuf_strip_suffix(>base.path, 
"/");
+
+   if (set_iterator_data(iter, level))
+   continue;
+
+   return ITER_OK;
} else if (closedir(level->dir))
warning("error closing directory %s: 
%s",
iter->base.path.buf, 
strerror(errno));
@@ -175,8 +197,22 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
if (set_iterator_data(iter, level))
continue;

+   /*
+* If we want to iterate dirs after files, we shall
+* begin looking into them *before* we return the dir
+* itself.
+*/
+   if (S_ISDIR(iter->base.st.st_mode) &&
+   iter->flags & DIR_ITERATOR_DEPTH_FIRST) {
+   push_dir_level(iter, level);
+   goto continue_outer_loop;
+   }
+
return ITER_OK;
}
+
+continue_outer_loop:
+   ;
}
 }

@@ -201,7 +237,7 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
return ITER_DONE;
 }

-struct d

[PATCH v4 5/5] files_reflog_iterator: amend use of dir_iterator

2017-03-28 Thread Daniel Ferreira
Amend a call to dir_iterator_begin() to pass the flags parameter
introduced in 3efb5c0 ("dir_iterator: iterate over dir after its
contents", 2017-28-03).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 refs/files-backend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index 50188e9..b4bba74 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -3346,7 +3346,7 @@ static struct ref_iterator 
*files_reflog_iterator_begin(struct ref_store *ref_st
files_downcast(ref_store, 0, "reflog_iterator_begin");

base_ref_iterator_init(ref_iterator, _reflog_iterator_vtable);
-   iter->dir_iterator = dir_iterator_begin(git_path("logs"));
+   iter->dir_iterator = dir_iterator_begin(git_path("logs"), 0);
return ref_iterator;
 }

--
2.7.4 (Apple Git-66)



[PATCH v4 1/5] dir_iterator: add helpers to dir_iterator_advance

2017-03-28 Thread Daniel Ferreira
Create inline helpers to dir_iterator_advance(). Make
dir_iterator_advance()'s code more legible and allow some behavior to
be reusable.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 65 +-
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..853c040 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };

+static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static inline int pop_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   return --iter->levels_nr;
+}
+
+static inline int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -84,11 +121,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * over; now prepare to iterate into
 * it.
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +137,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter, level) == 0)
return dir_iterator_abort(dir_iterator);

continue;
@@ -129,7 +162,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
iter->base.path.buf, 
strerror(errno));

level->dir = NULL;
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter, level) == 0)
return dir_iterator_abort(dir_iterator);
break;
}
@@ -138,23 +171,9 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
continue;

strbuf_addstr(>base.path, de->d_name);
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
-   if (errno != ENOENT)
-   warning("error reading path '%s': %s",
-   iter->base.path.buf,
-   strerror(errno));
-   continue;
-   }

-   /*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
-*/
-   iter->base.relative_path =
-   iter->base.path.buf + 
iter->levels[0].prefix_len;
-   iter->base.basename =
-   iter->base.path.buf + level->prefix_len;
-   level->dir_state = DIR_STATE_ITER;
+   if (set_iterator_data(iter, level))
+   continue;

return ITER_OK;
}
--
2.7.4 (Apple Git-66)



[PATCH v4 0/5] [GSoC] remove_subtree(): reimplement using iterators

2017-03-28 Thread Daniel Ferreira
Hi there,

This is the fourth version of the GSoC microproject of refactoring
remove_subtree() from recursively using readdir() to use dir_iterator. Below
are the threads for other versions:

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#t
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t
v3: 
https://public-inbox.org/git/CAGZ79kYtpmURSQWPumobA=e3jbfjkhwcdv_lphkcd71zrwm...@mail.gmail.com/T/#t

In this version of the patch, I followed Michael's suggestion
of splitting the commits responsible for adding the new feature to
dir_iterator. His suggestion of using flags instead of an options
struct has also been adopted.

This version also contains a test that is finally able to test the
function decently (not the one Stefan had suggested, which
did not result in a call to remove_subtree). I am just unsure about
its location in t/. I'd appreciate suggestions to put it in a more
decent home.

Daniel Ferreira (5):
  dir_iterator: add helpers to dir_iterator_advance
  dir_iterator: iterate over dir after its contents
  remove_subtree(): reimplement using iterators
  remove_subtree(): test removing nested directories
  files_reflog_iterator: amend use of dir_iterator

 dir-iterator.c  | 105 +++-
 dir-iterator.h  |  14 --
 entry.c |  31 
 refs/files-backend.c|   2 +-
 t/t2000-checkout-cache-clash.sh |  11 +
 5 files changed, 114 insertions(+), 49 deletions(-)

--
2.7.4 (Apple Git-66)



[PATCH v4 4/5] remove_subtree(): test removing nested directories

2017-03-28 Thread Daniel Ferreira
Test removing a nested directory when an attempt is made to restore the
index to a state where it does not exist. A similar test could be found
previously in t/t2000-checkout-cache-clash.sh, but it did not check for
nested directories, which could allow a faulty implementation of
remove_subtree() pass the tests.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 t/t2000-checkout-cache-clash.sh | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/t/t2000-checkout-cache-clash.sh b/t/t2000-checkout-cache-clash.sh
index de3edb5..ac10ba3 100755
--- a/t/t2000-checkout-cache-clash.sh
+++ b/t/t2000-checkout-cache-clash.sh
@@ -57,4 +57,15 @@ test_expect_success SYMLINKS 'checkout-index -f twice with 
--prefix' '
git checkout-index -a -f --prefix=there/
 '

+test_expect_success 'git checkout-index -f should remove nested subtrees' '
+   echo content >path &&
+   git update-index --add path &&
+   rm path &&
+   mkdir -p path/with/nested/paths &&
+   echo content >path/file1 &&
+   echo content >path/with/nested/paths/file2 &&
+   git checkout-index -f -a &&
+   test ! -d path
+'
+
 test_done
--
2.7.4 (Apple Git-66)



[GSoC] Proposal: turn git-add--interactive.perl into a builtin

2017-03-25 Thread Daniel Ferreira (theiostream)
Hi there. First of all, I'd like to thank all of the support up to now
with my microproject :). Here's a first draft of my proposal for
Google Summer of Code '17, based on the "Convert scripts to builtins"
idea. Please let me know what you think.

---

SYNOPSIS
There are many advantages to converting parts of git that are still
scripts to C builtins, among which execution speed, improved
compatibility and code deduplication. This proposal aims to apply this
to git-add--interactive, one of the most useful features of Git.

FEASIBILITY
Many git scripts have attracted attention for being turned into
builtins. There is ongoing work on git-stash
(https://public-inbox.org/git/20170321053135.thk77soxc4irx...@sigill.intra.peff.net/),
and porting interactive rebase is one of the ideas for this edition of
GSoC. Not as much attention, however, has been directed to
git-add--interactive.

There was only one discussion regarding the feasibility of its porting
(https://public-inbox.org/git/CAP8UFD2PcBsU6=FK4OHVrB7E98ycohS_0pYcbCBar=of1hl...@mail.gmail.com/).
It resulted in a consensus that doing it would be a task too large –
although interesting – for GSoC 2015 based on the amount of its lines
of code. It is, however, only a few lines larger than
git-rebase--interactive, which has been considered an appropriate
idea. As such, it looks like a possible project for three months of
full-time work.

Aside from the benefits cited above, turning git-add--interactive into
a builtin can reduce Git's dependency on Perl to the point where no
"common" command would continue to rely on it.

PROJECTED TIMELINE
- Prior to May 4
-- Refine my basic knowledge of Perl
-- Craft one or two small patches to some of Git's Perl components
(preferentially to git-add--interactive itself) to improve my
understanding of the language and of how Git's Perl scripts actually
work

- May 4 - May 30
-- Clarify implementation details with my mentor, and work on a more
detailed roadmap for the project
-- Investigate roughly how to replace command invocations from the
script with actual builtin functions; which Git APIs in Perl already
have functional equivalents in C; which parts will require a full
rewrite.

- May 30 - June 30 (start of coding period)
-- Define the architecture of the builtin within git (which
functions/interfaces will it have? where will its code reside?).
-- Implement a small subset of the builtin (to be defined with my
mentor) and glue it into the existing Perl script. Present this as a
first patch to get feedback early regarding the implementation and
avoid piling up mistakes early.
-- Do necessary changes based on this initial review.
-- Have roughly 1/3 of the script's functionality ported to C.

- June 30 - July 28
-- Port the remainder of the script to a builtin.
-- Have a weekly roadmap, sending a part of the patch every 15 days to
the mailing list for review and to avoid massive commits by the end of
GSoC.
-- Apply suggestions from community reviews when possible; if not,
save them for doing toward the end of GSoC (see below).
(Note: due to a previous commitment, during a five-day period of July
I will only be able to work part-time on GSoC. The actual week will be
known over the next weeks.)

- July 28 - August 29
-- By the start of this period, send a patch with the builtin fully
implemented to the mailing list.
-- Fix bugs, test extensively, possibly extend test coverage for
git-add--interactive.
-- Respond to the (predictably big) community feedback regarding the change.

I currently work full-time in a payments company (see below), but in
case of being accepted I am willing to quit my job some months early
to dedicate myself fully to GSoC starting June.

BIOGRAPHICAL INFORMATION
My name is Daniel Ferreira and I'm a student from São Paulo, Brazil. I
was accepted by Stanford University last year and I will start college
this fall. I started coding C about six years ago writing up system
modifications ("tweaks") for jailbroken iPhones. Since then, I have
written/contributed to a couple of open-source projects like an IRC
bot and other assorted things – all of them tracked on Git
(https://github.com/theiostream). I have also developed a
(closed-source) library in C for interacting with payment terminals in
the company I have worked for over the last two years (Pagar.me).
There, we use Git extensively for managing projects with around 20
people working concurrently.

MICROPROJECT
I have sent a series of patches to complete the microproject of
converting recursive calls to readdir() into calls to dir_iterator.
The most recent version can be found in
https://public-inbox.org/git/1490465551-71056-2-git-send-email-bnm...@gmail.com/T/#u.

Thanks,
-- Daniel.


[PATCH v3 1/2] [GSoC] dir_iterator: iterate over dir after its contents

2017-03-25 Thread Daniel Ferreira
Create an option for the dir_iterator API to iterate over a directory
path only after having iterated through its contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18).

This is useful for recursively removing a directory and calling rmdir()
on a directory only after all of its contents have been wiped.

An "options" member has been added to the dir_iterator struct. It
contains the "iterate_dirs_after_files" flag, that enables the feature
when set to 1. Default behavior continues to be iterating over directory
paths before its contents.

Two inline functions have been added to dir_iterator's code to avoid
code repetition inside dir_iterator_advance() and make code more clear.

No particular functions or wrappers for setting the options struct's
fields have been added to avoid either breaking the current dir_iterator
API or over-engineering an extremely simple option architecture.

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 dir-iterator.c | 100 -
 dir-iterator.h |   7 
 2 files changed, 84 insertions(+), 23 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index 34182a9..833d56a 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -50,6 +50,43 @@ struct dir_iterator_int {
struct dir_iterator_level *levels;
 };

+static inline void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   level->dir_state = DIR_STATE_RECURSE;
+   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
+  iter->levels_alloc);
+   level = >levels[iter->levels_nr++];
+   level->initialized = 0;
+}
+
+static inline int pop_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   return --iter->levels_nr;
+}
+
+static inline int set_iterator_data(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+{
+   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   if (errno != ENOENT)
+   warning("error reading path '%s': %s",
+   iter->base.path.buf,
+   strerror(errno));
+   return -1;
+   }
+
+   /*
+* We have to set these each time because
+* the path strbuf might have been realloc()ed.
+*/
+   iter->base.relative_path =
+   iter->base.path.buf + iter->levels[0].prefix_len;
+   iter->base.basename =
+   iter->base.path.buf + level->prefix_len;
+   level->dir_state = DIR_STATE_ITER;
+
+   return 0;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
struct dir_iterator_int *iter =
@@ -77,18 +114,16 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
}

level->initialized = 1;
-   } else if (S_ISDIR(iter->base.st.st_mode)) {
+   } else if (S_ISDIR(iter->base.st.st_mode) &&
+   !iter->base.options.iterate_dirs_after_files) {
if (level->dir_state == DIR_STATE_ITER) {
/*
 * The directory was just iterated
 * over; now prepare to iterate into
-* it.
+* it (unless an option is set for us
+* to do otherwise).
 */
-   level->dir_state = DIR_STATE_RECURSE;
-   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-  iter->levels_alloc);
-   level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   push_dir_level(iter, level);
continue;
} else {
/*
@@ -104,7 +139,7 @@ int dir_iterator_advance(struct dir_iterator *dir_iterator)
 * This level is exhausted (or wasn't opened
 * successfully); pop up a level.
 */
-   if (--iter->levels_nr == 0)
+   if (pop_dir_level(iter, level) == 0)
return dir_iterator_abort(dir_iterator);

continue;
@@ -120,16 +155,33 @@ int dir_iterator_advance(struct dir_iterator 
*dir_iterator)
de = readdir(level->dir);

if (!de) {
-   /* This level is exhausted; pop up a level. */
+   /* 

[PATCH v3 2/2] [GSoC] remove_subtree(): reimplement using iterators

2017-03-25 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---
 entry.c | 32 +++-
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/entry.c b/entry.c
index c6eea24..670ffeb 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@
 #include "blob.h"
 #include "dir.h"
 #include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"

 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -46,29 +48,17 @@ static void create_directories(const char *path, int 
path_len,

 static void remove_subtree(struct strbuf *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
+   struct dir_iterator *diter = dir_iterator_begin(path->buf);
+   diter->options.iterate_dirs_after_files = 1;
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (S_ISDIR(diter->st.st_mode)) {
+   if (rmdir(diter->path.buf))
+   die_errno("cannot rmdir '%s'", path->buf);
+   } else if (unlink(diter->path.buf))
die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
}
-   closedir(dir);
+
if (rmdir(path->buf))
die_errno("cannot rmdir '%s'", path->buf);
 }
--
2.7.4 (Apple Git-66)



[PATCH v3 0/2] [GSoC] remove_subtree(): reimplement using iterators

2017-03-25 Thread Daniel Ferreira
This is the third version of the GSoC microproject
of refactoring remove_subtree() from recursively using
readdir() to use dir_iterator. Below are the threads for
other versions:

v1: 
https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#mae023e7a7d7626f00e0923833c4359f5af493730
v2: 
https://public-inbox.org/git/cacsjy8dxh-qpbblfafwpawusba9gvxa7x+mxljevykhk1zo...@mail.gmail.com/T/#t

Duy suggested adding features to dir_iterator might go
beyond the intention of a microproject, but I figured I
might go for it to learn more about the project.

The dir_iterator reimplementation has been tested in a
separate binary I created (and linked with libgit.a) to
reproduce remove_subtree()'s contents. As pointed out in the
last thread, git's tests for this function were unable to
catch a daunting bug I had introduced, and I still haven't
been able to come up with a way to reproduce remove_subtree()
being called. Any help?

Thank you all again for all the reviews.

Daniel Ferreira (2):
  dir_iterator: iterate over dir after its contents
  remove_subtree(): reimplement using iterators

 dir-iterator.c | 100 -
 dir-iterator.h |   7 
 entry.c|  32 +++---
 3 files changed, 95 insertions(+), 44 deletions(-)

--
2.7.4 (Apple Git-66)



Re: [PATCH v2] [GSoC] remove_subtree(): reimplement using iterators

2017-03-25 Thread Daniel Ferreira (theiostream)
You are correct, which shows that since all tests pass, we need to
come up with better cases for this function.

As for a solution, I believe that the best way to go for it is to
dir_iterator's implementation to have an "Option to iterate over
directory paths before vs. after their contents" (something predicted
in the commit that created it). If it iterates over directories after
all of its contents (currently it does so before) we just need to
check if the entry is a directory and if so, rmdir() it. Does that
make sense?

On Sat, Mar 25, 2017 at 8:51 AM, Duy Nguyen <pclo...@gmail.com> wrote:
> On Sat, Mar 25, 2017 at 6:27 PM, Daniel Ferreira <bnm...@gmail.com> wrote:
>> Use dir_iterator to traverse through remove_subtree()'s directory tree,
>> avoiding the need for recursive calls to readdir(). Simplify
>> remove_subtree()'s code.
>>
>> A conversion similar in purpose was previously done at 46d092a
>> ("for_each_reflog(): reimplement using iterators", 2016-05-21).
>>
>> Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
>> ---
>>
>> This is a second-version patch of the Google Summer of Code microproject for
>> refactoring recursive readdir() calls to use dir_iterator instead. v1 can be
>> found in:
>>
>> https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#mae023e7a7d7626f00e0923833c4359f5af493730
>>
>> Additionally, for debugging purposes I turned remove_subtree() into a no-op
>> and ran git tests. Some failures were at:
>>
>> * t2000-checkout-cache-clash.sh
>> * t2003-checkout-cache-mkdir.sh
>>
>> If you guys could check those files out and warn me if any additional tests
>> would be welcome, please let me know.
>>
>> Thanks.
>>
>>  entry.c | 28 +++-
>>  1 file changed, 7 insertions(+), 21 deletions(-)
>>
>> diff --git a/entry.c b/entry.c
>> index c6eea240b..3cb92592d 100644
>> --- a/entry.c
>> +++ b/entry.c
>> @@ -2,6 +2,8 @@
>>  #include "blob.h"
>>  #include "dir.h"
>>  #include "streaming.h"
>> +#include "iterator.h"
>> +#include "dir-iterator.h"
>>
>>  static void create_directories(const char *path, int path_len,
>>const struct checkout *state)
>> @@ -46,29 +48,13 @@ static void create_directories(const char *path, int 
>> path_len,
>>
>>  static void remove_subtree(struct strbuf *path)
>>  {
>> -   DIR *dir = opendir(path->buf);
>> -   struct dirent *de;
>> -   int origlen = path->len;
>> -
>> -   if (!dir)
>> -   die_errno("cannot opendir '%s'", path->buf);
>> -   while ((de = readdir(dir)) != NULL) {
>> -   struct stat st;
>> -
>> -   if (is_dot_or_dotdot(de->d_name))
>> -   continue;
>> -
>> -   strbuf_addch(path, '/');
>> -   strbuf_addstr(path, de->d_name);
>> -   if (lstat(path->buf, ))
>> -   die_errno("cannot lstat '%s'", path->buf);
>> -   if (S_ISDIR(st.st_mode))
>> -   remove_subtree(path);
>> -   else if (unlink(path->buf))
>> +   struct dir_iterator *diter = dir_iterator_begin(path->buf);
>> +
>> +   while (dir_iterator_advance(diter) == ITER_OK) {
>> +   if (unlink(diter->path.buf))
>> die_errno("cannot unlink '%s'", path->buf);
>> -   strbuf_setlen(path, origlen);
>> }
>> -   closedir(dir);
>> +
>> if (rmdir(path->buf))
>> die_errno("cannot rmdir '%s'", path->buf);
>
> Even though it's very nice that lots of code is deleted. This is not
> entirely correct, is it? Before this patch, rmdir() is called for
> every recursive remove_subtree() call. After this patch, it's only
> called once (and likely fails unless you have no subdirectories).
>
>>  }
>> --
>> 2.12.1.433.g82305b74f.dirty
>>
>
>
>
> --
> Duy


[PATCH v2] [GSoC] remove_subtree(): reimplement using iterators

2017-03-25 Thread Daniel Ferreira
Use dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir(). Simplify
remove_subtree()'s code.

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---

This is a second-version patch of the Google Summer of Code microproject for
refactoring recursive readdir() calls to use dir_iterator instead. v1 can be
found in:

https://public-inbox.org/git/CAGZ79kZwT-9mHTiOJ5CEjk2wDFkn6+NcogjX0=vjhsah16a...@mail.gmail.com/T/#mae023e7a7d7626f00e0923833c4359f5af493730

Additionally, for debugging purposes I turned remove_subtree() into a no-op
and ran git tests. Some failures were at:

* t2000-checkout-cache-clash.sh
* t2003-checkout-cache-mkdir.sh

If you guys could check those files out and warn me if any additional tests
would be welcome, please let me know.

Thanks.

 entry.c | 28 +++-
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/entry.c b/entry.c
index c6eea240b..3cb92592d 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@
 #include "blob.h"
 #include "dir.h"
 #include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"

 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -46,29 +48,13 @@ static void create_directories(const char *path, int 
path_len,

 static void remove_subtree(struct strbuf *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
-   int origlen = path->len;
-
-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
-   strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
+   struct dir_iterator *diter = dir_iterator_begin(path->buf);
+
+   while (dir_iterator_advance(diter) == ITER_OK) {
+   if (unlink(diter->path.buf))
die_errno("cannot unlink '%s'", path->buf);
-   strbuf_setlen(path, origlen);
}
-   closedir(dir);
+
if (rmdir(path->buf))
die_errno("cannot rmdir '%s'", path->buf);
 }
--
2.12.1.433.g82305b74f.dirty



Re: [PATCH] [GSoC] remove_subtree(): reimplement using iterators

2017-03-24 Thread Daniel Ferreira (theiostream)
> On Fri, Mar 24, 2017 at 2:02 PM, Stefan Beller  wrote:

> Welcome to the Git community!

Thank you!

> Please use a more imperative style. (e.g. s/Uses/Use/ ...
> s/and simplfying/which simplifies/)

Thank you. Will do in a second version of this patch.

> Thanks for this link. It gives good context for reviewing the change,
> but it will not be good context to record as a commit message.
> (When someone looks at a commit message later on, they are usually trying
> to figure out what the author was thinking; if there were any special cases to
> be thought about. Was performance on the authors mind? etc)

> So I propose to put the link into the more informal section if a
> reroll is needed.

Perfect. I will remove it from the message.

> Instead of constructing the path again here based on relative path
> and the path parameter, I wonder if we could use
>
> if (unlink(diter->path))
> ..
>
> here? Then we would not need the strbuf at all?

Yes, we can! Thank you for the pointer. Will be in the next version of the
patch.

> Also we'd need to handle (empty) directories differently for removal?

>From what I've tested, we do not need to do it.

> Do we need to check the return code of dir_iterator_advance
> for ITER_ERROR as well?

I believe not – it only tries to perform an operation if we have ITER_OK. Since
ITER_ERROR would end up in a no-op anyway I don't see how a check for it
would be useful.

>
>
> > }
> > -   closedir(dir);
> > +
> > if (rmdir(path->buf))
> > die_errno("cannot rmdir '%s'", path->buf);
>
> This would remove the "top level" directory as given by path.
> When reading the dir-iterator code, I am not sure if this is
> also part of the yield in dir_iterator_advance.

I've tested it, and it does not yield in there.

Thank you for the advice, and as stated, will submit a v2 of the patch
in short notice.

Thank you,
Daniel.


[PATCH] [GSoC] remove_subtree(): reimplement using iterators

2017-03-23 Thread Daniel Ferreira
Uses dir_iterator to traverse through remove_subtree()'s directory tree,
avoiding the need for recursive calls to readdir() and simplifying code.

Suggested in the GSoC microproject list, as well as:
https://public-inbox.org/git/xmqqk27m4h3h@gitster.mtv.corp.google.com/

A conversion similar in purpose was previously done at 46d092a
("for_each_reflog(): reimplement using iterators", 2016-05-21).

Signed-off-by: Daniel Ferreira <bnm...@gmail.com>
---

Hey there! This is my microproject for Google Summer of Code on git.
It has passed on Travis CI (https://travis-ci.org/theiostream/git),
although I would appreciate any suggestion to improve test coverage
for the affected function.

This is, to my knowledge, one of the few microprojects that have not
yet been started by someone on this list, but please let me know if
someone else is already on it.

Thank you.

 entry.c | 24 +++-
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/entry.c b/entry.c
index c6eea24..a88c219 100644
--- a/entry.c
+++ b/entry.c
@@ -2,6 +2,8 @@
 #include "blob.h"
 #include "dir.h"
 #include "streaming.h"
+#include "iterator.h"
+#include "dir-iterator.h"

 static void create_directories(const char *path, int path_len,
   const struct checkout *state)
@@ -46,29 +48,17 @@ static void create_directories(const char *path, int 
path_len,

 static void remove_subtree(struct strbuf *path)
 {
-   DIR *dir = opendir(path->buf);
-   struct dirent *de;
+   struct dir_iterator *diter = dir_iterator_begin(path->buf);
int origlen = path->len;

-   if (!dir)
-   die_errno("cannot opendir '%s'", path->buf);
-   while ((de = readdir(dir)) != NULL) {
-   struct stat st;
-
-   if (is_dot_or_dotdot(de->d_name))
-   continue;
-
+   while (dir_iterator_advance(diter) == ITER_OK) {
strbuf_addch(path, '/');
-   strbuf_addstr(path, de->d_name);
-   if (lstat(path->buf, ))
-   die_errno("cannot lstat '%s'", path->buf);
-   if (S_ISDIR(st.st_mode))
-   remove_subtree(path);
-   else if (unlink(path->buf))
+   strbuf_addstr(path, diter->relative_path);
+   if (unlink(path->buf))
die_errno("cannot unlink '%s'", path->buf);
strbuf_setlen(path, origlen);
}
-   closedir(dir);
+
if (rmdir(path->buf))
die_errno("cannot rmdir '%s'", path->buf);
 }
--
2.7.4 (Apple Git-66)