Re: [PATCH v10 4/5] dir_iterator: rewrite state machine model
On 04/19/2017 03:14 PM, Daniel Ferreira wrote: > Perform a rewrite of dir_iterator_advance(). dir_iterator has > ceased to rely on a combination of level.initialized and level.dir_state > state variables and now only tracks the state with level.dir_state, > which simplifies the iterator mechanism, makes the code easier to follow > and eases additions of new features to the iterator. > > Make dir_iterator_begin() attempt to lstat() the path it receives, and > return NULL and an appropriate errno if it fails or if the passed path > was not a directory. > > Create an option for the dir_iterator API to iterate over subdirectories > only after having iterated through their contents. This feature was > predicted, although not implemented by 0fe5043 ("dir_iterator: new API > for iterating over a directory tree", 2016-06-18). This is useful for > recursively removing a directory and calling rmdir() on a directory only > after all of its contents have been wiped. > > Add an option for dir_iterator to also iterate over the initial > directory (the one passed to dir_iterator_begin()). > > Add the "flags" parameter to dir_iterator_create, allowing for the > aforementioned new features to be enabled. The new default behavior > (i.e. flags set to 0) does not iterate over directories. Flag > DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing > so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a > directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR > iterates over the initial directory. These flags do not conflict with > each other and may be used simultaneously. > > Amend a call to dir_iterator_begin() in refs/files-backend.c to pass > the flags parameter introduced, as well as handle the case in which it > fails to open the directory. > > Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to > test "post-order" and "iterate-over-root" modes. > > Michael Haggerty contributed with the design of the new > dir_iterator_advance() implementation, the code for > t/helper/test-dir-iterator's option parser and numerous reviews that > gradually shaped this code to its current form. > > Signed-off-by: Michael Haggerty> Signed-off-by: Daniel Ferreira > --- > dir-iterator.c | 220 > ++- > dir-iterator.h | 35 +-- > refs/files-backend.c | 51 ++ > t/helper/test-dir-iterator.c | 31 +- > t/t0065-dir-iterator.sh | 94 ++ > 5 files changed, 316 insertions(+), 115 deletions(-) > > diff --git a/dir-iterator.c b/dir-iterator.c > index d168cb2..3c20e0e 100644 > --- a/dir-iterator.c > +++ b/dir-iterator.c > @@ -4,8 +4,6 @@ > #include "dir-iterator.h" > > struct dir_iterator_level { > - int initialized; > - > DIR *dir; > > /* > @@ -20,9 +18,15 @@ struct dir_iterator_level { >* iteration and also iterated into): >*/ > enum { > - DIR_STATE_ITER, > - DIR_STATE_RECURSE > + DIR_STATE_PUSHED, > + DIR_STATE_PRE_ITERATION, > + DIR_STATE_ITERATING, > + DIR_STATE_POST_ITERATION, > + DIR_STATE_EXHAUSTED > } dir_state; > + > + /* The stat structure for the directory this level represents. */ > + struct stat st; > }; > > /* > @@ -48,15 +52,23 @@ struct dir_iterator_int { >* that will be included in this iteration. >*/ > struct dir_iterator_level *levels; > + > + /* Holds the flags passed to dir_iterator_begin(). */ > + unsigned flags; > }; > > -static void push_dir_level(struct dir_iterator_int *iter, struct > dir_iterator_level *level) > +static void push_dir_level(struct dir_iterator_int *iter, struct stat *st) > { > - level->dir_state = DIR_STATE_RECURSE; > + struct dir_iterator_level *level; > + > ALLOC_GROW(iter->levels, iter->levels_nr + 1, > iter->levels_alloc); > + > + /* Push a new level */ > level = >levels[iter->levels_nr++]; > - level->initialized = 0; > + level->dir = NULL; > + level->dir_state = DIR_STATE_PUSHED; > + level->st = *st; > } > > static int pop_dir_level(struct dir_iterator_int *iter) > @@ -67,7 +79,11 @@ static int pop_dir_level(struct dir_iterator_int *iter) > static int adjust_iterator_data(struct dir_iterator_int *iter, > struct dir_iterator_level *level) > { > - if (lstat(iter->base.path.buf, >base.st) < 0) { > + char *last_path_component; > + > + if (level->dir_state != DIR_STATE_ITERATING) { > + iter->base.st = level->st; > + } else if (lstat(iter->base.path.buf, >base.st) < 0) { > if (errno != ENOENT) > warning("error reading path '%s': %s", > iter->base.path.buf, > @@ -76,18 +92,48 @@ static int adjust_iterator_data(struct
Re: [PATCH v10 4/5] dir_iterator: rewrite state machine model
Daniel Ferreirawrites: > diff --git a/t/t0065-dir-iterator.sh b/t/t0065-dir-iterator.sh > index 46e5ce5..4c6632f 100755 > --- a/t/t0065-dir-iterator.sh > +++ b/t/t0065-dir-iterator.sh > @@ -15,31 +15,41 @@ test_expect_success 'setup' ' > >dir/d/e/d/a && > > mkdir -p dir2/a/b/c/ && > - >dir2/a/b/c/d > + >dir2/a/b/c/d && > + > + >file > ' > > -test_expect_success 'dir-iterator should iterate through all files' ' > - cat >expect-sorted-output <<-\EOF && > - [d] (a) [a] ./dir/a > - [d] (a/b) [b] ./dir/a/b > - [d] (a/b/c) [c] ./dir/a/b/c > - [d] (d) [d] ./dir/d > - [d] (d/e) [e] ./dir/d/e > - [d] (d/e/d) [d] ./dir/d/e/d > - [f] (a/b/c/d) [d] ./dir/a/b/c/d > - [f] (a/e) [e] ./dir/a/e > - [f] (b) [b] ./dir/b > - [f] (c) [c] ./dir/c > - [f] (d/e/d/a) [a] ./dir/d/e/d/a > - EOF > +cat >expect-sorted-output <<-\EOF && > +[d] (a) [a] ./dir/a > +[d] (a/b) [b] ./dir/a/b > +[d] (a/b/c) [c] ./dir/a/b/c > +[d] (d) [d] ./dir/d > +[d] (d/e) [e] ./dir/d/e > +[d] (d/e/d) [d] ./dir/d/e/d > +[f] (a/b/c/d) [d] ./dir/a/b/c/d > +[f] (a/e) [e] ./dir/a/e > +[f] (b) [b] ./dir/b > +[f] (c) [c] ./dir/c > +[f] (d/e/d/a) [a] ./dir/d/e/d/a > +EOF > > - test-dir-iterator ./dir >out && There is something fishy going on around here in this patch, pushing the code to prepare test vector out of test_expect_success block. A mistake in rebasing or something? If you need to reroll the series to update this part, please rename the test to t0066 and do remember to update the logmessage of a few commits that refer to t0065. Thanks.
[PATCH v10 4/5] dir_iterator: rewrite state machine model
Perform a rewrite of dir_iterator_advance(). dir_iterator has ceased to rely on a combination of level.initialized and level.dir_state state variables and now only tracks the state with level.dir_state, which simplifies the iterator mechanism, makes the code easier to follow and eases additions of new features to the iterator. Make dir_iterator_begin() attempt to lstat() the path it receives, and return NULL and an appropriate errno if it fails or if the passed path was not a directory. Create an option for the dir_iterator API to iterate over subdirectories only after having iterated through their contents. This feature was predicted, although not implemented by 0fe5043 ("dir_iterator: new API for iterating over a directory tree", 2016-06-18). This is useful for recursively removing a directory and calling rmdir() on a directory only after all of its contents have been wiped. Add an option for dir_iterator to also iterate over the initial directory (the one passed to dir_iterator_begin()). Add the "flags" parameter to dir_iterator_create, allowing for the aforementioned new features to be enabled. The new default behavior (i.e. flags set to 0) does not iterate over directories. Flag DIR_ITERATOR_PRE_ORDER_TRAVERSAL iterates over a directory before doing so over its contents. DIR_ITERATOR_POST_ORDER_TRAVERSAL iterates over a directory after doing so over its contents. DIR_ITERATOR_LIST_ROOT_DIR iterates over the initial directory. These flags do not conflict with each other and may be used simultaneously. Amend a call to dir_iterator_begin() in refs/files-backend.c to pass the flags parameter introduced, as well as handle the case in which it fails to open the directory. Improve t/t0065-dir-iterator.sh and t/helper/test-dir-iterator.c to test "post-order" and "iterate-over-root" modes. Michael Haggerty contributed with the design of the new dir_iterator_advance() implementation, the code for t/helper/test-dir-iterator's option parser and numerous reviews that gradually shaped this code to its current form. Signed-off-by: Michael HaggertySigned-off-by: Daniel Ferreira --- dir-iterator.c | 220 ++- dir-iterator.h | 35 +-- refs/files-backend.c | 51 ++ t/helper/test-dir-iterator.c | 31 +- t/t0065-dir-iterator.sh | 94 ++ 5 files changed, 316 insertions(+), 115 deletions(-) diff --git a/dir-iterator.c b/dir-iterator.c index d168cb2..3c20e0e 100644 --- a/dir-iterator.c +++ b/dir-iterator.c @@ -4,8 +4,6 @@ #include "dir-iterator.h" struct dir_iterator_level { - int initialized; - DIR *dir; /* @@ -20,9 +18,15 @@ struct dir_iterator_level { * iteration and also iterated into): */ enum { - DIR_STATE_ITER, - DIR_STATE_RECURSE + DIR_STATE_PUSHED, + DIR_STATE_PRE_ITERATION, + DIR_STATE_ITERATING, + DIR_STATE_POST_ITERATION, + DIR_STATE_EXHAUSTED } dir_state; + + /* The stat structure for the directory this level represents. */ + struct stat st; }; /* @@ -48,15 +52,23 @@ struct dir_iterator_int { * that will be included in this iteration. */ struct dir_iterator_level *levels; + + /* Holds the flags passed to dir_iterator_begin(). */ + unsigned flags; }; -static void push_dir_level(struct dir_iterator_int *iter, struct dir_iterator_level *level) +static void push_dir_level(struct dir_iterator_int *iter, struct stat *st) { - level->dir_state = DIR_STATE_RECURSE; + struct dir_iterator_level *level; + ALLOC_GROW(iter->levels, iter->levels_nr + 1, iter->levels_alloc); + + /* Push a new level */ level = >levels[iter->levels_nr++]; - level->initialized = 0; + level->dir = NULL; + level->dir_state = DIR_STATE_PUSHED; + level->st = *st; } static int pop_dir_level(struct dir_iterator_int *iter) @@ -67,7 +79,11 @@ static int pop_dir_level(struct dir_iterator_int *iter) static int adjust_iterator_data(struct dir_iterator_int *iter, struct dir_iterator_level *level) { - if (lstat(iter->base.path.buf, >base.st) < 0) { + char *last_path_component; + + if (level->dir_state != DIR_STATE_ITERATING) { + iter->base.st = level->st; + } else if (lstat(iter->base.path.buf, >base.st) < 0) { if (errno != ENOENT) warning("error reading path '%s': %s", iter->base.path.buf, @@ -76,18 +92,48 @@ static int adjust_iterator_data(struct dir_iterator_int *iter, } /* -* We have to set these each time because -* the path strbuf might have been realloc()ed. +* Check if we are dealing with the root directory as an +*