Re: [PATCH v10 4/5] dir_iterator: rewrite state machine model

2017-04-20 Thread Michael Haggerty
On 04/19/2017 03:14 PM, Daniel Ferreira wrote:
> Perform a rewrite of dir_iterator_advance(). dir_iterator has
> ceased to rely on a combination of level.initialized and level.dir_state
> state variables and now only tracks the state with level.dir_state,
> which simplifies the iterator mechanism, makes the code easier to follow
> and eases additions of new features to the iterator.
> 
> Make dir_iterator_begin() attempt to lstat() the path it receives, and
> return NULL and an appropriate errno if it fails or if the passed path
> was not a directory.
> 
> Create an option for the dir_iterator API to iterate over subdirectories
> only after having iterated through their contents. This feature was
> predicted, although not implemented by 0fe5043 ("dir_iterator: new API
> for iterating over a directory tree", 2016-06-18). This is useful for
> recursively removing a directory and calling rmdir() on a directory only
> after all of its contents have been wiped.
> 
> Add an option for dir_iterator to also iterate over the initial
> directory (the one passed to dir_iterator_begin()).
> 
> Add the "flags" parameter to dir_iterator_create, allowing for the
> aforementioned new features to be enabled. The new default behavior
> (i.e. flags set to 0) does not iterate over directories. Flag
> DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
> so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
> directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
> iterates over the initial directory. These flags do not conflict with
> each other and may be used simultaneously.
> 
> Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
> the flags parameter introduced, as well as handle the case in which it
> fails to open the directory.
> 
> Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
> test "post-order" and "iterate-over-root" modes.
> 
> Michael Haggerty contributed with the design of the new
> dir_iterator_advance() implementation, the code for
> t/helper/test-dir-iterator's option parser and numerous reviews that
> gradually shaped this code to its current form.
> 
> Signed-off-by: Michael Haggerty 
> Signed-off-by: Daniel Ferreira 
> ---
>  dir-iterator.c   | 220 
> ++-
>  dir-iterator.h   |  35 +--
>  refs/files-backend.c |  51 ++
>  t/helper/test-dir-iterator.c |  31 +-
>  t/t0065-dir-iterator.sh  |  94 ++
>  5 files changed, 316 insertions(+), 115 deletions(-)
> 
> diff --git a/dir-iterator.c b/dir-iterator.c
> index d168cb2..3c20e0e 100644
> --- a/dir-iterator.c
> +++ b/dir-iterator.c
> @@ -4,8 +4,6 @@
>  #include "dir-iterator.h"
>  
>  struct dir_iterator_level {
> - int initialized;
> -
>   DIR *dir;
>  
>   /*
> @@ -20,9 +18,15 @@ struct dir_iterator_level {
>* iteration and also iterated into):
>*/
>   enum {
> - DIR_STATE_ITER,
> - DIR_STATE_RECURSE
> + DIR_STATE_PUSHED,
> + DIR_STATE_PRE_ITERATION,
> + DIR_STATE_ITERATING,
> + DIR_STATE_POST_ITERATION,
> + DIR_STATE_EXHAUSTED
>   } dir_state;
> +
> + /* The stat structure for the directory this level represents. */
> + struct stat st;
>  };
>  
>  /*
> @@ -48,15 +52,23 @@ struct dir_iterator_int {
>* that will be included in this iteration.
>*/
>   struct dir_iterator_level *levels;
> +
> + /* Holds the flags passed to dir_iterator_begin(). */
> + unsigned flags;
>  };
>  
> -static void push_dir_level(struct dir_iterator_int *iter, struct 
> dir_iterator_level *level)
> +static void push_dir_level(struct dir_iterator_int *iter, struct stat *st)
>  {
> - level->dir_state = DIR_STATE_RECURSE;
> + struct dir_iterator_level *level;
> +
>   ALLOC_GROW(iter->levels, iter->levels_nr + 1,
>  iter->levels_alloc);
> +
> + /* Push a new level */
>   level = >levels[iter->levels_nr++];
> - level->initialized = 0;
> + level->dir = NULL;
> + level->dir_state = DIR_STATE_PUSHED;
> + level->st = *st;
>  }
>  
>  static int pop_dir_level(struct dir_iterator_int *iter)
> @@ -67,7 +79,11 @@ static int pop_dir_level(struct dir_iterator_int *iter)
>  static int adjust_iterator_data(struct dir_iterator_int *iter,
>   struct dir_iterator_level *level)
>  {
> - if (lstat(iter->base.path.buf, >base.st) < 0) {
> + char *last_path_component;
> +
> + if (level->dir_state != DIR_STATE_ITERATING) {
> + iter->base.st = level->st;
> + } else if (lstat(iter->base.path.buf, >base.st) < 0) {
>   if (errno != ENOENT)
>   warning("error reading path '%s': %s",
>   iter->base.path.buf,
> @@ -76,18 +92,48 @@ static int adjust_iterator_data(struct 

Re: [PATCH v10 4/5] dir_iterator: rewrite state machine model

2017-04-19 Thread Junio C Hamano
Daniel Ferreira  writes:

> diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh
> index 46e5ce5..4c6632f 100755
> --- a/t/t0065-dir-iterator.sh
> +++ b/t/t0065-dir-iterator.sh
> @@ -15,31 +15,41 @@ test_expect_success 'setup' '
>   >dir/d/e/d/a &&
>  
>   mkdir -p dir2/a/b/c/ &&
> - >dir2/a/b/c/d
> + >dir2/a/b/c/d &&
> +
> + >file
>  '
>  
> -test_expect_success 'dir-iterator should iterate through all files' '
> - cat >expect-sorted-output <<-\EOF &&
> - [d] (a) [a] ./dir/a
> - [d] (a/b) [b] ./dir/a/b
> - [d] (a/b/c) [c] ./dir/a/b/c
> - [d] (d) [d] ./dir/d
> - [d] (d/e) [e] ./dir/d/e
> - [d] (d/e/d) [d] ./dir/d/e/d
> - [f] (a/b/c/d) [d] ./dir/a/b/c/d
> - [f] (a/e) [e] ./dir/a/e
> - [f] (b) [b] ./dir/b
> - [f] (c) [c] ./dir/c
> - [f] (d/e/d/a) [a] ./dir/d/e/d/a
> - EOF
> +cat >expect-sorted-output <<-\EOF &&
> +[d] (a) [a] ./dir/a
> +[d] (a/b) [b] ./dir/a/b
> +[d] (a/b/c) [c] ./dir/a/b/c
> +[d] (d) [d] ./dir/d
> +[d] (d/e) [e] ./dir/d/e
> +[d] (d/e/d) [d] ./dir/d/e/d
> +[f] (a/b/c/d) [d] ./dir/a/b/c/d
> +[f] (a/e) [e] ./dir/a/e
> +[f] (b) [b] ./dir/b
> +[f] (c) [c] ./dir/c
> +[f] (d/e/d/a) [a] ./dir/d/e/d/a
> +EOF
>  
> - test-dir-iterator ./dir >out &&

There is something fishy going on around here in this patch, pushing
the code to prepare test vector out of test_expect_success block.  A
mistake in rebasing or something?

If you need to reroll the series to update this part, please rename
the test to t0066 and do remember to update the logmessage of a few
commits that refer to t0065.

Thanks.


[PATCH v10 4/5] dir_iterator: rewrite state machine model

2017-04-19 Thread Daniel Ferreira
Perform a rewrite of dir_iterator_advance(). dir_iterator has
ceased to rely on a combination of level.initialized and level.dir_state
state variables and now only tracks the state with level.dir_state,
which simplifies the iterator mechanism, makes the code easier to follow
and eases additions of new features to the iterator.

Make dir_iterator_begin() attempt to lstat() the path it receives, and
return NULL and an appropriate errno if it fails or if the passed path
was not a directory.

Create an option for the dir_iterator API to iterate over subdirectories
only after having iterated through their contents. This feature was
predicted, although not implemented by 0fe5043 ("dir_iterator: new API
for iterating over a directory tree", 2016-06-18). This is useful for
recursively removing a directory and calling rmdir() on a directory only
after all of its contents have been wiped.

Add an option for dir_iterator to also iterate over the initial
directory (the one passed to dir_iterator_begin()).

Add the "flags" parameter to dir_iterator_create, allowing for the
aforementioned new features to be enabled. The new default behavior
(i.e. flags set to 0) does not iterate over directories. Flag
DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing
so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a
directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR
iterates over the initial directory. These flags do not conflict with
each other and may be used simultaneously.

Amend a call to dir_iterator_begin() in refs/files-backend.c to pass
the flags parameter introduced, as well as handle the case in which it
fails to open the directory.

Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to
test "post-order" and "iterate-over-root" modes.

Michael Haggerty contributed with the design of the new
dir_iterator_advance() implementation, the code for
t/helper/test-dir-iterator's option parser and numerous reviews that
gradually shaped this code to its current form.

Signed-off-by: Michael Haggerty 
Signed-off-by: Daniel Ferreira 
---
 dir-iterator.c   | 220 ++-
 dir-iterator.h   |  35 +--
 refs/files-backend.c |  51 ++
 t/helper/test-dir-iterator.c |  31 +-
 t/t0065-dir-iterator.sh  |  94 ++
 5 files changed, 316 insertions(+), 115 deletions(-)

diff --git a/dir-iterator.c b/dir-iterator.c
index d168cb2..3c20e0e 100644
--- a/dir-iterator.c
+++ b/dir-iterator.c
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-   int initialized;
-
DIR *dir;
 
/*
@@ -20,9 +18,15 @@ struct dir_iterator_level {
 * iteration and also iterated into):
 */
enum {
-   DIR_STATE_ITER,
-   DIR_STATE_RECURSE
+   DIR_STATE_PUSHED,
+   DIR_STATE_PRE_ITERATION,
+   DIR_STATE_ITERATING,
+   DIR_STATE_POST_ITERATION,
+   DIR_STATE_EXHAUSTED
} dir_state;
+
+   /* The stat structure for the directory this level represents. */
+   struct stat st;
 };
 
 /*
@@ -48,15 +52,23 @@ struct dir_iterator_int {
 * that will be included in this iteration.
 */
struct dir_iterator_level *levels;
+
+   /* Holds the flags passed to dir_iterator_begin(). */
+   unsigned flags;
 };
 
-static void push_dir_level(struct dir_iterator_int *iter, struct 
dir_iterator_level *level)
+static void push_dir_level(struct dir_iterator_int *iter, struct stat *st)
 {
-   level->dir_state = DIR_STATE_RECURSE;
+   struct dir_iterator_level *level;
+
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
   iter->levels_alloc);
+
+   /* Push a new level */
level = >levels[iter->levels_nr++];
-   level->initialized = 0;
+   level->dir = NULL;
+   level->dir_state = DIR_STATE_PUSHED;
+   level->st = *st;
 }
 
 static int pop_dir_level(struct dir_iterator_int *iter)
@@ -67,7 +79,11 @@ static int pop_dir_level(struct dir_iterator_int *iter)
 static int adjust_iterator_data(struct dir_iterator_int *iter,
struct dir_iterator_level *level)
 {
-   if (lstat(iter->base.path.buf, >base.st) < 0) {
+   char *last_path_component;
+
+   if (level->dir_state != DIR_STATE_ITERATING) {
+   iter->base.st = level->st;
+   } else if (lstat(iter->base.path.buf, >base.st) < 0) {
if (errno != ENOENT)
warning("error reading path '%s': %s",
iter->base.path.buf,
@@ -76,18 +92,48 @@ static int adjust_iterator_data(struct dir_iterator_int 
*iter,
}
 
/*
-* We have to set these each time because
-* the path strbuf might have been realloc()ed.
+* Check if we are dealing with the root directory as an
+*